Ejemplo n.º 1
0
 def parse_sub_cinema(self, response):
     cinema = response.meta['cinema']
     # sub cinema use its own name
     cinema_name = response.xpath(
         '//div[@id="more-anchor-01"]/h4/text()').extract_first()
     cinema['names'] = [standardize_cinema_name(cinema_name)]
     self.parse_seat_number_list(response, cinema)
     yield cinema
 def parse_county(self, response):
     """
     parse cinemas for each county
     """
     cinema_list = response.xpath(self.cinema_xpath)
     for curr_cinema in cinema_list:
         cinema_name = curr_cinema.xpath('./text()').extract_first()
         cinema_name = standardize_cinema_name(cinema_name)
         if not self.is_cinema_crawl(cinema_name):
             continue
         url = curr_cinema.xpath('./@href').extract_first()
         url = self.adjust_cinema_url(response.urljoin(url))
         request = scrapy.Request(url, callback=self.parse_cinema)
         request.meta['county_name'] = response.meta['county_name']
         request.meta['cinema_name'] = cinema_name
         yield request
Ejemplo n.º 3
0
 def parse_cinema(self, response):
     cinema_name = response.xpath(
         '//h1[@class="c-page_heading is-lv-01"]'
         '/span/text()').extract_first()
     cinema = CinemaItem()
     cinema['names'] = [standardize_cinema_name(cinema_name)]
     cinema['screens'] = {}
     cinema['county'] = response.meta['county']
     cinema['company'] = 'TOHO'
     cinema['source'] = self.name
     cinema['site'] = response.meta['site']
     # some cinemas have detail page and need to forward
     sub_page_list = response.xpath(
         '//section[@class="about"]//a[@class="link bold"]/@href').extract()
     if sub_page_list:
         for sub_page_url in sub_page_list:
             sub_page_url = response.urljoin(sub_page_url)
             request = scrapy.Request(sub_page_url,
                                      callback=self.parse_sub_cinema)
             request.meta['cinema'] = copy.deepcopy(cinema)
             yield request
     else:
         self.parse_seat_number_list(response, cinema)
         yield cinema
Ejemplo n.º 4
0
 def replace_cinema_name(self, cinema_name):
     self.replace_value('cinema_name', standardize_cinema_name(cinema_name))
Ejemplo n.º 5
0
 def add_cinema_name(self, cinema_name):
     self.add_value('cinema_name', standardize_cinema_name(cinema_name))