Python ShowingLoaderの例、scrapyproject.items.ShowingLoader Pythonの例

コード例 #1

0

ファイルを表示

ファイル: movix.py プロジェクト: gas1121/JapanCinemaStatusSpider

 def parse(self, response):
     """
     crawl theater list data first
     """
     theater_list = response.xpath('//div[@class="theater_info"]//li/a')
     for theater_element in theater_list:
         curr_cinema_url = theater_element.xpath(
             './@href').extract_first()
         cinema_name = theater_element.xpath('./text()').extract_first()
         if not cinema_name:
             # partner theater element is different
             cinema_name = ''.join(theater_element.xpath(
                 './/text()').extract())
         else:
             curr_cinema_url = response.urljoin(curr_cinema_url)
         data_proto = ShowingLoader(response=response)
         data_proto.add_cinema_name(cinema_name)
         cinema_name = data_proto.get_output_value('cinema_name')
         data_proto.add_cinema_site(curr_cinema_url, cinema_name)
         data_proto.add_value('source', self.name)
         if not self.is_cinema_crawl([cinema_name]):
             continue
         request = scrapy.Request(
             curr_cinema_url, callback=self.parse_cinema)
         request.meta["data_proto"] = data_proto.load_item()
         yield request

コード例 #2

0

ファイルを表示

 def parse(self, response):
     """
     crawl theater list data first
     """
     theater_list = response.xpath(
         '//footer/p[position()>=2 and position() <=3]//a')
     # partner cinema is not included
     for theater_element in theater_list:
         curr_cinema_url = theater_element.xpath(
             './@href').extract_first()
         cinema_name = theater_element.xpath('./text()').extract_first()
         data_proto = ShowingLoader(response=response)
         data_proto.add_cinema_name(cinema_name)
         cinema_name = data_proto.get_output_value('cinema_name')
         data_proto.add_cinema_site(curr_cinema_url, cinema_name)
         data_proto.add_value('source', self.name)
         if not self.is_cinema_crawl([cinema_name]):
             continue
         cinema_name_en = curr_cinema_url.split('/')[-1].split('?')[0]
         request = scrapy.Request(
             curr_cinema_url, callback=self.parse_main_page)
         request.meta["data_proto"] = data_proto.load_item()
         request.meta["cinema_name_en"] = cinema_name_en
         request.meta["dont_merge_cookies"] = True
         yield request

コード例 #3

0

ファイルを表示

 def parse(self, response):
     """
     crawl theater list data first
     """
     theater_div_list = response.xpath(
         '//div[@class="theater-list__inner"]')
     for theater_element in theater_div_list:
         # forum site have multiple cinema on one site, so we need to
         # specify cinema name on schedule page
         cinema_name = theater_element.xpath('./h4/text()').extract_first()
         data_proto = ShowingLoader(response=response)
         data_proto.add_cinema_name(cinema_name)
         cinema_name = data_proto.get_output_value('cinema_name')
         if not self.is_cinema_crawl([cinema_name]):
             continue
         curr_cinema_url = theater_element.xpath(
             './p/a/@href').extract_first()
         data_proto.add_cinema_site(response.urljoin(curr_cinema_url),
                                    cinema_name)
         data_proto.add_value('source', self.name)
         schedule_url = self.generate_cinema_schedule_url(
             curr_cinema_url, self.date)
         request = scrapy.Request(schedule_url, callback=self.parse_cinema)
         request.meta["data_proto"] = data_proto.load_item()
         yield request

コード例 #4

0

ファイルを表示

ファイル: united.py プロジェクト: gas1121/JapanCinemaStatusSpider

 def parse(self, response):
     """
     crawl theater list data first
     """
     theater_list = response.xpath(
         '//section[@class="rcol searchTheater"]//li')
     for theater_element in theater_list:
         if theater_element.xpath('./@class').extract_first() == "area":
             continue
         curr_cinema_url = theater_element.xpath(
             './a/@href').extract_first()
         cinema_img = theater_element.xpath('./img/@src').extract_first()
         cinema_name = theater_element.xpath('./a/img/@alt').extract_first()
         if cinema_img is not None:
             if "icon_uc_ss.gif" in cinema_img:
                 cinema_name = "ユナイテッド・シネマ" + cinema_name
             elif "icon_cpx_ss.gif" in cinema_img:
                 cinema_name = "シネプレックス" + cinema_name
         data_proto = ShowingLoader(response=response)
         data_proto.add_cinema_name(cinema_name)
         cinema_name = data_proto.get_output_value('cinema_name')
         data_proto.add_cinema_site(
             response.urljoin(curr_cinema_url), cinema_name)
         data_proto.add_value('source', self.name)
         if not self.is_cinema_crawl([cinema_name]):
             continue
         cinema_name_en = curr_cinema_url.split('/')[-2]
         schedule_url = self.generate_cinema_schedule_url(
             cinema_name_en, self.date)
         request = scrapy.Request(schedule_url, callback=self.parse_cinema)
         request.meta["data_proto"] = data_proto.load_item()
         yield request

コード例 #5

0

ファイルを表示

ファイル: korona.py プロジェクト: gas1121/JapanCinemaStatusSpider

 def parse_cinema(self, response):
     data_proto = ShowingLoader(response=response)
     data_proto.add_value(None, response.meta["data_proto"])
     result_list = []
     movie_list = response.xpath('//div[@class="wrapFilm"]')
     for curr_movie in movie_list:
         self.parse_movie(response, curr_movie, data_proto, result_list)
     for result in result_list:
         if result:
             yield result

コード例 #6

0

ファイルを表示

ファイル: movix.py プロジェクト: gas1121/JapanCinemaStatusSpider

 def parse_shechedule(self, response):
     data_proto = ShowingLoader(response=response)
     data_proto.add_value(None, response.meta["data_proto"])
     result_list = []
     movie_section_list = response.xpath('//div[@class="scheduleBox"]')
     for curr_movie in movie_section_list:
         self.parse_movie(response, curr_movie, data_proto, result_list)
     for result in result_list:
         if result:
             yield result

コード例 #7

0

ファイルを表示

 def parse_cinema(self, response):
     data_proto = ShowingLoader(response=response)
     data_proto.add_value(None, response.meta["data_proto"])
     result_list = []
     movie_section_list = response.xpath('//div[@id="timetable"]/article')
     for curr_movie in movie_section_list:
         self.parse_movie(response, curr_movie, data_proto, result_list)
     for result in result_list:
         if result:
             yield result

コード例 #8

0

ファイルを表示

 def parse_cinema(self, response):
     data_proto = ShowingLoader(response=response)
     data_proto.add_value(None, response.meta["data_proto"])
     result_list = []
     movie_section_list = response.xpath(
         '//section[@data-accordion-group="movie"]')
     for curr_movie in movie_section_list:
         self.parse_movie(response, curr_movie, data_proto, result_list)
     for result in result_list:
         if result:
             yield result

コード例 #9

0

ファイルを表示

 def parse_cinema(self, response):
     """
     cinema home page
     we have to pass this page to get independent cookie for each cinema
     """
     data_proto = ShowingLoader(response=response)
     data_proto.add_value(None, response.meta["data_proto"])
     result_list = []
     movie_title_list = response.xpath('//div[@class="cinemaTitle elp"]')
     movie_section_list = response.xpath('//div[@class="theaterListWrap"]')
     for curr_movie in zip(movie_title_list, movie_section_list):
         self.parse_movie(response, curr_movie, data_proto, result_list)
     for result in result_list:
         if result:
             yield result

コード例 #10

0

ファイルを表示

 def parse_sub_cinema(self, response, sub_cinema,
                      showing_url_parameter, result_list):
     site_cd = sub_cinema['code']
     showing_url_parameter['site_cd'] = site_cd
     data_proto = ShowingLoader(response=response)
     data_proto.add_cinema_name(sub_cinema['name'])
     cinema_site = TohoUtil.generate_cinema_homepage_url(site_cd)
     data_proto.add_cinema_site(cinema_site, sub_cinema['name'])
     data_proto.add_value('source', self.name)
     for curr_movie in sub_cinema['list']:
         self.parse_movie(response, curr_movie, showing_url_parameter,
                          data_proto, result_list)

コード例 #11

0

ファイルを表示

ファイル: cinemasunshine.py プロジェクト: gas1121/JapanCinemaStatusSpider

 def parse_cinema(self, response):
     try:
         schedule_data = json.loads(response.text)
     except json.JSONDecodeError:
         return
     if (not schedule_data):
         return
     if 'data' not in schedule_data or 'movie' not in schedule_data['data']:
         return
     data_proto = ShowingLoader(response=response)
     data_proto.add_value(None, response.meta["data_proto"])
     result_list = []
     movie_list = []
     if isinstance(schedule_data['data']['movie'], dict):
         movie_list.append(schedule_data['data']['movie'])
     else:
         movie_list = schedule_data['data']['movie']
     for curr_movie in movie_list:
         self.parse_movie(response, curr_movie, data_proto, result_list)
     for result in result_list:
         if result:
             yield result

コード例 #12

0

ファイルを表示

ファイル: korona.py プロジェクト: gas1121/JapanCinemaStatusSpider

 def parse(self, response):
     """
     crawl theater list data first
     """
     theater_list = response.xpath(
         '//div[@class="LNbowlingList LNshopList"]//a')
     for theater_element in theater_list:
         county_name = theater_element.xpath('./text()').extract_first()
         cinema_name = county_name + "コロナシネマワールド"
         data_proto = ShowingLoader(response=response)
         data_proto.add_cinema_name(cinema_name)
         cinema_name = data_proto.get_output_value('cinema_name')
         if not self.is_cinema_crawl([cinema_name]):
             continue
         curr_cinema_url = theater_element.xpath('./@href').extract_first()
         data_proto.add_cinema_site(curr_cinema_url, cinema_name)
         data_proto.add_value('source', self.name)
         cinema_name_en = curr_cinema_url.split('/')[-2]
         schedule_url = self.generate_cinema_schedule_url(
             cinema_name_en, self.date)
         request = scrapy.Request(schedule_url, callback=self.parse_cinema)
         request.meta["data_proto"] = data_proto.load_item()
         yield request

コード例 #13

0

ファイルを表示

 def parse_screen(self, response, curr_screen, data_proto, result_list):
     screen_data_proto = ShowingLoader(response=response)
     screen_data_proto.add_value(None, data_proto.load_item())
     screen_name = curr_screen.xpath('./tr/td[1]/text()').extract_first()
     screen_data_proto.add_screen_name(screen_name)
     show_section_list = curr_screen.xpath('./tr/td[2]/a')
     for curr_showing in show_section_list:
         self.parse_showing(response, curr_showing,
                            screen_data_proto, result_list)

コード例 #14

0

ファイルを表示

 def parse(self, response):
     """
     crawl theater list data first
     """
     theater_list = response.xpath('//section[@id="theatres"]//a')
     for theater_element in theater_list:
         curr_cinema_url = theater_element.xpath('./@href').extract_first()
         cinema_name = theater_element.xpath('./text()').extract_first()
         if cinema_name != "ムービル":
             cinema_name = "109シネマズ" + cinema_name
         data_proto = ShowingLoader(response=response)
         data_proto.add_cinema_name(cinema_name)
         cinema_name = data_proto.get_output_value('cinema_name')
         data_proto.add_cinema_site(response.urljoin(curr_cinema_url),
                                    cinema_name)
         data_proto.add_value('source', self.name)
         if not self.is_cinema_crawl([cinema_name]):
             continue
         cinema_name_en = curr_cinema_url.split('/')[-2]
         schedule_url = self.generate_cinema_schedule_url(
             cinema_name_en, self.date)
         request = scrapy.Request(schedule_url, callback=self.parse_cinema)
         request.meta["data_proto"] = data_proto.load_item()
         yield request

コード例 #15

0

ファイルを表示

ファイル: cinemasunshine.py プロジェクト: gas1121/JapanCinemaStatusSpider

 def parse(self, response):
     """
     crawl theater list data first
     """
     theater_list = response.xpath('//li[@class="clearfix"]')
     for theater_element in theater_list:
         cinema_name = theater_element.xpath(
             './p[@class="theaterName"]/a/text()').extract_first()
         data_proto = ShowingLoader(response=response)
         data_proto.add_cinema_name(cinema_name)
         cinema_name = data_proto.get_output_value('cinema_name')
         if not self.is_cinema_crawl([cinema_name]):
             continue
         curr_cinema_url = theater_element.xpath(
             './p[@class="theaterName"]/a/@href').extract_first()
         data_proto.add_cinema_site(response.urljoin(curr_cinema_url),
                                    cinema_name)
         data_proto.add_value('source', self.name)
         cinema_name_en = curr_cinema_url.split('/')[-1]
         json_url = self.generate_cinema_schedule_url(
             cinema_name_en, self.date)
         request = scrapy.Request(json_url, callback=self.parse_cinema)
         request.meta["data_proto"] = data_proto.load_item()
         yield request

コード例 #16

0

ファイルを表示

 def parse(self, response):
     """
     crawl theater list data first
     """
     theater_link_list = response.xpath(
         '//div[contains(@class,"area")]//dd//a')
     for theater_link in theater_link_list:
         # forum site have multiple cinema on one site, so we need to
         # specify cinema name on schedule page
         city_name = theater_link.xpath('./text()').extract_first()
         cinema_name = "イオンシネマ" + city_name
         data_proto = ShowingLoader(response=response)
         data_proto.add_cinema_name(cinema_name)
         cinema_name = data_proto.get_output_value('cinema_name')
         if not self.is_cinema_crawl([cinema_name]):
             continue
         curr_cinema_url = theater_link.xpath('./@href').extract_first()
         curr_cinema_url = response.urljoin(curr_cinema_url)
         data_proto.add_cinema_site(curr_cinema_url, cinema_name)
         data_proto.add_value('source', self.name)
         request = scrapy.Request(curr_cinema_url,
                                  callback=self.parse_cinema)
         request.meta["data_proto"] = data_proto.load_item()
         yield request

コード例 #17

0

ファイルを表示

 def parse_screen(self, response, curr_screen, data_proto, result_list):
     screen_data_proto = ShowingLoader(response=response)
     screen_data_proto.add_value(None, data_proto.load_item())
     screen_name = ''.join(
         curr_screen.xpath('./li[@class="theatre"]/a//text()').extract())
     screen_data_proto.add_screen_name(screen_name)
     show_section_list = curr_screen.xpath('./li')[1:]
     for curr_showing in show_section_list:
         self.parse_showing(response, curr_showing, screen_data_proto,
                            result_list)

コード例 #18

0

ファイルを表示

ファイル: united.py プロジェクト: gas1121/JapanCinemaStatusSpider

 def parse_screen(self, response, curr_screen, data_proto, result_list):
     screen_data_proto = ShowingLoader(response=response)
     screen_data_proto.add_value(None, data_proto.load_item())
     screen_name = curr_screen.xpath('./p/a/img/@alt').extract_first()
     screen_name = 'screen' + re.findall(r'\d+', screen_name)[0]
     screen_data_proto.add_screen_name(screen_name)
     show_section_list = curr_screen.xpath('./ol/li')
     for curr_showing in show_section_list:
         self.parse_showing(response, curr_showing,
                            screen_data_proto, result_list)

コード例 #19

0

ファイルを表示

ファイル: united.py プロジェクト: gas1121/JapanCinemaStatusSpider

 def parse_movie(self, response, curr_movie, data_proto, result_list):
     """
     parse movie showing data
     """
     title = curr_movie.xpath('./h3/span/a[1]/text()').extract_first()
     movie_data_proto = ShowingLoader(response=response)
     movie_data_proto.add_value(None, data_proto.load_item())
     movie_data_proto.add_title(title=title)
     title_list = movie_data_proto.get_title_list()
     if not self.is_movie_crawl(title_list):
         return
     screen_section_list = curr_movie.xpath('./ul/li')
     for curr_screen in screen_section_list:
         self.parse_screen(response, curr_screen,
                           movie_data_proto, result_list)

コード例 #20

0

ファイルを表示

 def parse_movie(self, response, curr_movie, data_proto, result_list):
     """
     parse movie showing data
     """
     title = curr_movie.xpath('./h2/text()').extract_first()
     title_en = curr_movie.xpath('./h2/span/text()').extract_first()
     movie_data_proto = ShowingLoader(response=response)
     movie_data_proto.add_value(None, data_proto.load_item())
     movie_data_proto.add_title(title=title, title_en=title_en)
     title_list = movie_data_proto.get_title_list()
     if not self.is_movie_crawl(title_list):
         return
     show_section_list = curr_movie.xpath('.//ul[@class="timetable"]/li')
     for curr_showing in show_section_list:
         self.parse_showing(response, curr_showing, movie_data_proto,
                            result_list)

コード例 #21

0

ファイルを表示

 def parse_screen(self, response, curr_screen,
                  showing_url_parameter, data_proto, result_list):
     showing_url_parameter['theater_cd'] = curr_screen['theaterCd']
     showing_url_parameter['screen_cd'] = curr_screen['code']
     screen_data_proto = ShowingLoader(response=response)
     screen_data_proto.add_value(None, data_proto.load_item())
     screen_data_proto.add_screen_name(curr_screen['ename'])
     for curr_showing in curr_screen['list']:
         # filter empty showing
         if not curr_showing['unsoldSeatInfo']:
             continue
         self.parse_showing(response, curr_showing, showing_url_parameter,
                            screen_data_proto, result_list)

コード例 #22

0

ファイルを表示

ファイル: cinemasunshine.py プロジェクト: gas1121/JapanCinemaStatusSpider

 def parse_screen(self, response, curr_screen, data_proto, result_list):
     screen = curr_screen['name']
     screen_data_proto = ShowingLoader(response=response)
     screen_data_proto.add_value(None, data_proto.load_item())
     screen_data_proto.add_screen_name(screen)
     showing_list = []
     if isinstance(curr_screen['time'], dict):
         showing_list.append(curr_screen['time'])
     else:
         showing_list = curr_screen['time']
     for curr_showing in showing_list:
         self.parse_showing(response, curr_showing, screen_data_proto,
                            result_list)

コード例 #23

0

ファイルを表示

 def parse_movie(self, response, curr_movie,
                 showing_url_parameter, data_proto, result_list):
     """
     parse movie showing data
     movie may have different versions
     """
     movie_data_proto = ShowingLoader(response=response)
     movie_data_proto.add_value(None, data_proto.load_item())
     movie_data_proto.add_title(
         title=curr_movie['name'], title_en=curr_movie['ename'])
     title_list = movie_data_proto.get_title_list()
     if not self.is_movie_crawl(title_list):
         return
     showing_url_parameter['movie_cd'] = curr_movie['code']
     for curr_screen in curr_movie['list']:
         self.parse_screen(response, curr_screen, showing_url_parameter,
                           movie_data_proto, result_list)

コード例 #24

0

ファイルを表示

ファイル: korona.py プロジェクト: gas1121/JapanCinemaStatusSpider

 def parse_movie(self, response, curr_movie, data_proto, result_list):
     """
     parse movie showing data
     """
     title = curr_movie.xpath('./h4/a/text()').extract_first()
     if not title:
         title = curr_movie.xpath('./h4/text()').extract_first()
     movie_data_proto = ShowingLoader(response=response)
     movie_data_proto.add_value(None, data_proto.load_item())
     movie_data_proto.add_title(title=title)
     title_list = movie_data_proto.get_title_list()
     if not self.is_movie_crawl(title_list):
         return
     showing_list = curr_movie.xpath('.//table//tr')
     for curr_showing in showing_list:
         self.parse_showing(response, curr_showing, movie_data_proto,
                            result_list)

コード例 #25

0

ファイルを表示

ファイル: movix.py プロジェクト: gas1121/JapanCinemaStatusSpider

 def parse_movie(self, response, curr_movie, data_proto, result_list):
     """
     parse movie showing data
     """
     title = curr_movie.xpath(
         './/div[@class="MovieTitle1"]//a/text()').extract_first()
     movie_data_proto = ShowingLoader(response=response)
     movie_data_proto.add_value(None, data_proto.load_item())
     movie_data_proto.add_title(title=title)
     title_list = movie_data_proto.get_title_list()
     if not self.is_movie_crawl(title_list):
         return
     showing_section_list = curr_movie.xpath(
         './/td[contains(@onmouseover,"eventover")]')
     for curr_showing in showing_section_list:
         self.parse_showing(response, curr_showing,
                            movie_data_proto, result_list)

コード例 #26

0

ファイルを表示

    def parse_movie(self, response, curr_movie, data_proto, result_list):
        """
        parse movie showing data

        curr_movie is a tuple
        """
        title_section, detail_section = curr_movie
        title = ''.join(title_section.xpath('./text()').extract())
        movie_data_proto = ShowingLoader(response=response)
        movie_data_proto.add_value(None, data_proto.load_item())
        movie_data_proto.add_title(title=title)
        title_list = movie_data_proto.get_title_list()
        if not self.is_movie_crawl(title_list):
            return
        screen_section_list = detail_section.xpath('.//table')
        for curr_screen in screen_section_list:
            self.parse_screen(response, curr_screen,
                              movie_data_proto, result_list)

コード例 #27

0

ファイルを表示

ファイル: cinemasunshine.py プロジェクト: gas1121/JapanCinemaStatusSpider

 def parse_movie(self, response, curr_movie, data_proto, result_list):
     """
     parse movie showing data
     """
     title = curr_movie['name']
     movie_data_proto = ShowingLoader(response=response)
     movie_data_proto.add_value(None, data_proto.load_item())
     movie_data_proto.add_title(title=title)
     title_list = movie_data_proto.get_title_list()
     if not self.is_movie_crawl(title_list):
         return
     screen_list = []
     if isinstance(curr_movie['screen'], dict):
         screen_list.append(curr_movie['screen'])
     else:
         screen_list = curr_movie['screen']
     for curr_screen in screen_list:
         self.parse_screen(response, curr_screen, movie_data_proto,
                           result_list)

コード例 #28

0

ファイルを表示

 def parse_movie(self, response, curr_movie, data_proto, result_list):
     """
     parse movie showing data
     """
     title = curr_movie.xpath(
         './div[1]/p[1]/span/preceding::*[1]/text()').extract_first()
     if not title:
         title = curr_movie.xpath('./div[1]/p[1]/text()').extract_first()
     title_en = curr_movie.xpath(
         './div[1]/p[1]/span/text()').extract_first()
     movie_data_proto = ShowingLoader(response=response)
     movie_data_proto.add_value(None, data_proto.load_item())
     movie_data_proto.add_title(title=title, title_en=title_en)
     title_list = movie_data_proto.get_title_list()
     if not self.is_movie_crawl(title_list):
         return
     show_section_list = curr_movie.xpath('./div[2]/div')
     for curr_showing in show_section_list:
         self.parse_showing(response, curr_showing, movie_data_proto,
                            result_list)

コード例 #29

0

ファイルを表示

    def parse_showing(self, response, curr_showing, data_proto, result_list):
        def parse_time(time_str):
            time_str = unicodedata.normalize('NFKC', start_time)
            time = time_str.split(":")
            return (int(time[0]), int(time[1]))

        # showing section passed in may be unusable and need to be filtered
        time_section = curr_showing.xpath('./div[@class="time"]')
        if not time_section:
            return
        showing_data_proto = ShowingLoader(response=response)
        showing_data_proto.add_value(None, data_proto.load_item())
        start_time = time_section.xpath('./span/span/text()').extract_first()
        start_hour, start_minute = parse_time(start_time)
        showing_data_proto.add_value(
            'start_time', self.get_time_from_text(start_hour, start_minute))
        end_time = time_section.xpath('./span/text()').extract_first()
        end_hour, end_minute = parse_time(end_time)
        showing_data_proto.add_value(
            'end_time', self.get_time_from_text(end_hour, end_minute))
        screen_name = curr_showing.xpath('./div[2]/a/text()').extract_first()
        showing_data_proto.add_screen_name(screen_name)
        # when site ordering is stopped stop crawling
        site_status = curr_showing.xpath('./a/span[2]/text()').extract_first()
        if site_status == '予約停止中':
            return
        # handle free order seat type showings
        seat_type = curr_showing.xpath(
            './div[@class="icon"]//img/@alt').extract_first()
        showing_data_proto.add_value('seat_type',
                                     AeonUtil.standardize_seat_type(seat_type))

        # query screen number from database
        showing_data_proto.add_total_seat_count()
        # check whether need to continue crawl booking data or stop now
        if not self.crawl_booking_data:
            result_list.append(showing_data_proto.load_item())
            return

        booking_data_proto = init_show_booking_loader(response=response)
        booking_data_proto.add_value('showing', showing_data_proto.load_item())
        book_status = curr_showing.xpath('./a/span/text()').extract_first()
        booking_data_proto.add_book_status(book_status, util=AeonUtil)
        book_status = booking_data_proto.get_output_value('book_status')
        seat_type = showing_data_proto.get_output_value('seat_type')
        if (seat_type == 'FreeSeat' or book_status in ['SoldOut', 'NotSold']):
            # sold out or not sold
            total_seat_count = showing_data_proto.get_output_value(
                'total_seat_count')
            book_seat_count = (total_seat_count
                               if book_status == 'SoldOut' else 0)
            booking_data_proto.add_value('book_seat_count', book_seat_count)
            booking_data_proto.add_time_data()
            result_list.append(booking_data_proto.load_item())
            return
        else:
            # normal, generate request to showing page
            showing_request = self.generate_agreement_request(
                response=response, curr_showing=curr_showing)
            # go to shchedule page again to generate independent cookie
            # for each showing
            schedule_url = response.meta['schedule_url']
            request = scrapy.Request(schedule_url,
                                     dont_filter=True,
                                     callback=self.parse_new_cookie)
            request.meta["data_proto"] = booking_data_proto.load_item()
            request.meta["showing_request"] = showing_request
            (performance_id, _,
             _) = self.extract_showing_parameters(curr_showing)
            request.meta["cookiejar"] = performance_id
            result_list.append(request)

コード例 #30

0

ファイルを表示

    def parse_showing(self, response, curr_showing, data_proto, result_list):
        showing_data_proto = ShowingLoader(response=response)
        showing_data_proto.add_value(None, data_proto.load_item())
        start_time = curr_showing.xpath(
            './div/text()').extract_first()[:-1]
        start_hour, start_minute = self.parse_time(start_time)
        showing_data_proto.add_value('start_time', self.get_time_from_text(
            start_hour, start_minute))
        # end time not displayed in schedule page

        showing_data_proto.add_value('seat_type', 'NormalSeat')

        # query screen number from database
        showing_data_proto.add_total_seat_count()
        # check whether need to continue crawl booking data or stop now
        if not self.crawl_booking_data:
            result_list.append(showing_data_proto.load_item())
            return

        booking_data_proto = init_show_booking_loader(response=response)
        booking_data_proto.add_value('showing', showing_data_proto.load_item())
        book_status = curr_showing.xpath('./div/@class').extract_first()
        booking_data_proto.add_book_status(book_status, util=KinezoUtil)
        book_status = booking_data_proto.get_output_value('book_status')
        if book_status in ['SoldOut', 'NotSold']:
            # sold out or not sold
            total_seat_count = showing_data_proto.get_output_value(
                'total_seat_count')
            book_seat_count = (
                total_seat_count if book_status == 'SoldOut' else 0)
            booking_data_proto.add_value('book_seat_count', book_seat_count)
            booking_data_proto.add_time_data()
            result_list.append(booking_data_proto.load_item())
            return
        else:
            # normal, need to crawl book number on order page
            url = curr_showing.xpath('./@href').extract_first()
            url = response.urljoin(url)
            request = scrapy.Request(url, callback=self.parse_normal_showing)
            request.meta["data_proto"] = booking_data_proto.load_item()
            result_list.append(request)