Ejemplo n.º 1
0
 def parse_normal_showing(self, response):
     booked_seat_count = len(response.css('[alt~="購入済(選択不可)"]'))
     result = init_show_booking_loader(
         response=response, item=response.meta["data_proto"])
     result.add_value('book_seat_count', booked_seat_count)
     result.add_time_data()
     yield result.load_item()
Ejemplo n.º 2
0
 def parse_normal_showing(self, response):
     result = init_show_booking_loader(
         response=response, item=response.meta["data_proto"])
     booked_seat_count = len(response.xpath(
         '//img[contains(@src,"seat_no.gif")]'))
     result.add_value('book_seat_count', booked_seat_count)
     result.add_time_data()
     yield result.load_item()
 def parse_normal_showing(self, response):
     # some cinemas are free seat ordered, so data may not be crawled
     booked_seat_count = len(
         response.xpath('//img[contains(@src,"seat_102.gif")]'))
     result = init_show_booking_loader(response=response,
                                       item=response.meta["data_proto"])
     result.add_value('book_seat_count', booked_seat_count)
     result.add_time_data()
     yield result.load_item()
Ejemplo n.º 4
0
    def parse_showing(self, response, curr_showing, data_proto, result_list):
        def parse_time(time_str):
            time = time_str.split(":")
            return (int(time[0]), int(time[1]))

        showing_data_proto = ShowingLoader(response=response)
        showing_data_proto.add_value(None, data_proto.load_item())
        screen_name = curr_showing.xpath('./th/div/text()').extract_first()
        showing_data_proto.add_screen_name(screen_name)
        start_time = curr_showing.xpath(
            './td[@class="time"]/div/text()').extract_first()
        start_hour, start_minute = parse_time(start_time)
        showing_data_proto.add_value(
            'start_time', self.get_time_from_text(start_hour, start_minute))
        end_time = curr_showing.xpath(
            './td[@class="time"]/div/span/text()').extract_first()[1:]
        end_hour, end_minute = parse_time(end_time)
        showing_data_proto.add_value(
            'end_time', self.get_time_from_text(end_hour, end_minute))
        showing_data_proto.add_value('seat_type', 'NormalSeat')

        # query screen number from database
        showing_data_proto.add_total_seat_count()
        # check whether need to continue crawl booking data or stop now
        if not self.crawl_booking_data:
            result_list.append(showing_data_proto.load_item())
            return

        booking_data_proto = init_show_booking_loader(response=response)
        booking_data_proto.add_value('showing', showing_data_proto.load_item())
        book_status = curr_showing.xpath(
            './/img[contains(@src,"icon_seat_vacant")]/@alt').extract_first()
        booking_data_proto.add_book_status(book_status, util=KoronaUtil)
        book_status = booking_data_proto.get_output_value('book_status')
        if book_status in ['SoldOut', 'NotSold']:
            # sold out or not sold
            total_seat_count = showing_data_proto.get_output_value(
                'total_seat_count')
            book_seat_count = (total_seat_count
                               if book_status == 'SoldOut' else 0)
            booking_data_proto.add_value('book_seat_count', book_seat_count)
            booking_data_proto.add_time_data()
            result_list.append(booking_data_proto.load_item())
            return
        else:
            # normal, need to crawl book number on order page
            url = curr_showing.xpath(
                './td[@class="btnReservation"]/div/a/@href').extract_first()
            request = scrapy.Request(url, callback=self.parse_normal_showing)
            request.meta["data_proto"] = booking_data_proto.load_item()
            result_list.append(request)
Ejemplo n.º 5
0
 def parse_normal_showing(self, response):
     result = init_show_booking_loader(response=response,
                                       item=response.meta["data_proto"])
     # extract seat info from javascript
     script_text = response.xpath(
         '//script[contains(., "seat_info")]/text()').extract_first()
     m = re.search(r'"total_seats":"(\d+)"', script_text)
     total_seat_count = int(m.group(1))
     m = re.search(r'"unsold_seat_number":"(\d+)"', script_text)
     unsold_seat_count = int(m.group(1))
     booked_seat_count = total_seat_count - unsold_seat_count
     result.add_value('book_seat_count', booked_seat_count)
     result.add_time_data()
     yield result.load_item()
Ejemplo n.º 6
0
 def parse_showing_seat_json(self, response):
     try:
         seat_data = json.loads(response.text)
     except json.JSONDecodeError:
         return
     result = init_show_booking_loader(response=response,
                                       item=response.meta["data_proto"])
     empty_seat_count = len(seat_data)
     booked_seat_count = (
         result.get_output_value('showing')['total_seat_count'] -
         empty_seat_count)
     result.add_value('book_seat_count', booked_seat_count)
     result.add_time_data()
     yield result.load_item()
Ejemplo n.º 7
0
    def parse_showing(self, response, curr_showing, data_proto, result_list):
        def parse_time(time_str):
            time = time_str.split(":")
            return (int(time[0]), int(time[1]))

        showing_data_proto = ShowingLoader(response=response)
        showing_data_proto.add_value(None, data_proto.load_item())
        screen_name = curr_showing.xpath('./p/text()').extract_first()
        showing_data_proto.add_screen_name(screen_name)
        start_time = curr_showing.xpath(
            './/span[@class="strong fontXL"]/text()').extract_first()
        start_hour, start_minute = parse_time(start_time)
        showing_data_proto.add_value('start_time', self.get_time_from_text(
            start_hour, start_minute))
        end_time = curr_showing.xpath(
            './/span[@class="strong fontXL"]/../text()').extract_first()[1:]
        end_hour, end_minute = parse_time(end_time)
        showing_data_proto.add_value('end_time', self.get_time_from_text(
            end_hour, end_minute))
        showing_data_proto.add_value('seat_type', 'NormalSeat')

        # query screen number from database
        showing_data_proto.add_total_seat_count()
        # check whether need to continue crawl booking data or stop now
        if not self.crawl_booking_data:
            result_list.append(showing_data_proto.load_item())
            return

        booking_data_proto = init_show_booking_loader(response=response)
        booking_data_proto.add_value('showing', showing_data_proto.load_item())
        book_status = curr_showing.xpath('.//img/@src').extract_first()
        booking_data_proto.add_book_status(book_status, util=MovixUtil)
        book_status = booking_data_proto.get_output_value('book_status')
        if book_status in ['SoldOut', 'NotSold']:
            # sold out or not sold
            total_seat_count = showing_data_proto.get_output_value(
                'total_seat_count')
            book_seat_count = (
                total_seat_count if book_status == 'SoldOut' else 0)
            booking_data_proto.add_value('book_seat_count', book_seat_count)
            booking_data_proto.add_time_data()
            result_list.append(booking_data_proto.load_item())
            return
        else:
            # normal, need to crawl book number on order page
            showing_script = curr_showing.xpath('./@onclick').extract_first()
            url = re.findall(r'\(\'(.+?)\'\,', showing_script)[0]
            request = scrapy.Request(url, callback=self.parse_normal_showing)
            request.meta["data_proto"] = booking_data_proto.load_item()
            result_list.append(request)
Ejemplo n.º 8
0
    def parse_showing(self, response, curr_showing,
                      showing_url_parameter, data_proto, result_list):
        def parse_time(time_str):
            """
            ex. "24:40"
            """
            time = time_str.split(":")
            return (int(time[0]), int(time[1]))
        showing_url_parameter['showing_cd'] = curr_showing['code']
        showing_data_proto = ShowingLoader(response=response)
        showing_data_proto.add_value(None, data_proto.load_item())
        # time like 24:40 can not be directly parsed,
        # so we need to shift time properly
        start_hour, start_minute = parse_time(curr_showing['showingStart'])
        showing_data_proto.add_value('start_time', self.get_time_from_text(
            start_hour, start_minute))
        end_hour, end_minute = parse_time(curr_showing['showingEnd'])
        showing_data_proto.add_value('end_time', self.get_time_from_text(
            end_hour, end_minute))
        showing_data_proto.add_value('seat_type', 'NormalSeat')

        # query screen number from database
        showing_data_proto.add_total_seat_count()
        # check whether need to continue crawl booking data or stop now
        if not self.crawl_booking_data:
            result_list.append(showing_data_proto.load_item())
            return
        booking_data_proto = init_show_booking_loader(response=response)
        booking_data_proto.add_value('showing', showing_data_proto.load_item())
        book_status = curr_showing['unsoldSeatInfo']['unsoldSeatStatus']
        booking_data_proto.add_book_status(book_status, util=TohoUtil)
        book_status = booking_data_proto.get_output_value('book_status')
        if book_status in ['SoldOut', 'NotSold']:
            # sold out or not sold
            total_seat_count = showing_data_proto.get_output_value(
                'total_seat_count')
            book_seat_count = (
                total_seat_count if book_status == 'SoldOut' else 0)
            booking_data_proto.add_value('book_seat_count', book_seat_count)
            booking_data_proto.add_time_data()
            result_list.append(booking_data_proto.load_item())
            return
        else:
            # normal, need to crawl book number on order page
            url = self.generate_showing_url(**showing_url_parameter)
            request = scrapy.Request(url,
                                     callback=self.parse_normal_showing)
            request.meta["data_proto"] = booking_data_proto.load_item()
            result_list.append(request)
Ejemplo n.º 9
0
 def parse_seat_json_api(self, response):
     result = init_show_booking_loader(response=response,
                                       item=response.meta["data_proto"])
     booked_normal_seat_count = len(
         response.xpath('//a[@class="seat seat-none"]'))
     booked_wheelseat_count = len(
         response.xpath('//a[@class="seat wheelseat-none"]'))
     booked_executive_seat_count = len(
         response.xpath('//a[@class="seat executive-none"]'))
     booked_seat_count = (booked_normal_seat_count +
                          booked_wheelseat_count +
                          booked_executive_seat_count)
     result.add_value('book_seat_count', booked_seat_count)
     result.add_time_data()
     yield result.load_item()
    def parse_showing(self, response, curr_showing, data_proto, result_list):
        def parse_time(time_str):
            return (int(time_str[:2]), int(time_str[2:]))

        showing_data_proto = ShowingLoader(response=response)
        showing_data_proto.add_value(None, data_proto.load_item())
        start_hour, start_minute = parse_time(curr_showing['start_time'])
        showing_data_proto.add_value(
            'start_time', self.get_time_from_text(start_hour, start_minute))
        end_hour, end_minute = parse_time(curr_showing['end_time'])
        showing_data_proto.add_value(
            'end_time', self.get_time_from_text(end_hour, end_minute))
        showing_data_proto.add_value('seat_type', 'NormalSeat')
        # TODO get seat type right now

        # query screen number from database
        showing_data_proto.add_total_seat_count()
        # check whether need to continue crawl booking data or stop now
        if not self.crawl_booking_data:
            result_list.append(showing_data_proto.load_item())
            return

        booking_data_proto = init_show_booking_loader(response=response)
        booking_data_proto.add_value('showing', showing_data_proto.load_item())
        booking_data_proto.add_book_status(curr_showing['available'],
                                           util=CinemaSunshineUtil)
        book_status = booking_data_proto.get_output_value('book_status')
        if book_status in ['SoldOut', 'NotSold']:
            # sold out or not sold
            total_seat_count = showing_data_proto.get_output_value(
                'total_seat_count')
            book_seat_count = (total_seat_count
                               if book_status == 'SoldOut' else 0)
            booking_data_proto.add_value('book_seat_count', book_seat_count)
            booking_data_proto.add_time_data()
            result_list.append(booking_data_proto.load_item())
            return
        else:
            # normal, need to crawl book number on order page
            url = curr_showing['url']
            request = scrapy.Request(url, callback=self.parse_pre_ordering)
            request.meta["data_proto"] = booking_data_proto.load_item()
            request.meta["dont_merge_cookies"] = True
            result_list.append(request)
Ejemplo n.º 11
0
    def parse_normal_showing(self, response):
        result = init_show_booking_loader(
            response=response, item=response.meta["data_proto"])
        time_text = response.xpath(
            '//span[@class="screenTime"]/text()').extract_first()
        time_list = time_text.split('-')
        start_time = time_list[0].strip()
        start_hour, start_minute = self.parse_time(start_time)
        result.get_output_value('showing')['start_time'] = \
            self.get_time_from_text(start_hour, start_minute)
        end_time = time_list[1].strip()
        end_hour, end_minute = self.parse_time(end_time)
        result.get_output_value('showing')['end_time'] = \
            self.get_time_from_text(end_hour, end_minute)

        booked_seat_count = len(response.xpath(
            '//li[@class="seatSell seatOff"]'))
        result.add_value('book_seat_count', booked_seat_count)
        result.add_time_data()
        yield result.load_item()
Ejemplo n.º 12
0
    def parse_showing(self, response, curr_showing, data_proto, result_list):
        showing_data_proto = ShowingLoader(response=response)
        showing_data_proto.add_value(None, data_proto.load_item())
        start_time = curr_showing.xpath(
            './div/text()').extract_first()[:-1]
        start_hour, start_minute = self.parse_time(start_time)
        showing_data_proto.add_value('start_time', self.get_time_from_text(
            start_hour, start_minute))
        # end time not displayed in schedule page

        showing_data_proto.add_value('seat_type', 'NormalSeat')

        # query screen number from database
        showing_data_proto.add_total_seat_count()
        # check whether need to continue crawl booking data or stop now
        if not self.crawl_booking_data:
            result_list.append(showing_data_proto.load_item())
            return

        booking_data_proto = init_show_booking_loader(response=response)
        booking_data_proto.add_value('showing', showing_data_proto.load_item())
        book_status = curr_showing.xpath('./div/@class').extract_first()
        booking_data_proto.add_book_status(book_status, util=KinezoUtil)
        book_status = booking_data_proto.get_output_value('book_status')
        if book_status in ['SoldOut', 'NotSold']:
            # sold out or not sold
            total_seat_count = showing_data_proto.get_output_value(
                'total_seat_count')
            book_seat_count = (
                total_seat_count if book_status == 'SoldOut' else 0)
            booking_data_proto.add_value('book_seat_count', book_seat_count)
            booking_data_proto.add_time_data()
            result_list.append(booking_data_proto.load_item())
            return
        else:
            # normal, need to crawl book number on order page
            url = curr_showing.xpath('./@href').extract_first()
            url = response.urljoin(url)
            request = scrapy.Request(url, callback=self.parse_normal_showing)
            request.meta["data_proto"] = booking_data_proto.load_item()
            result_list.append(request)
Ejemplo n.º 13
0
    def parse_showing_json(self, response):
        """
        extract showing info from json data
        """
        # TODO D-Box seat need check if handled right
        script_text = copy.deepcopy(response.text)
        script_text = re.sub(r'[\t\r\n]', '', script_text, re.DOTALL)
        script_text = re.sub(r'if\( typeof.+?{}}WMC_E_DATA = ', '',
                             script_text, re.DOTALL)
        json_data = demjson.decode(script_text)
        result = init_show_booking_loader(response=response,
                                          item=response.meta["data_proto"])

        booked_seat_count = 0
        for i in range(len(json_data['SeatMaps']['FLAG'][0])):
            for j in range(len(json_data['SeatMaps']['FLAG'][0][i])):
                curr_num = json_data['SeatMaps']['FLAG'][0][i][j]
                if curr_num == '3':
                    booked_seat_count += 1
        result.add_value('book_seat_count', booked_seat_count)
        result.add_time_data()
        yield result.load_item()
Ejemplo n.º 14
0
 def parse_normal_showing(self, response):
     seat_block = response.xpath('//div[@class="cinema_seets step1"]')
     all_li = len(seat_block.xpath('.//li'))
     useless_li = (
         len(seat_block.xpath('.//li[contains(@class,"none")]')) +
         len(seat_block.xpath('.//li[contains(@class,"seet_row_head")]')))
     total_seat_count = all_li - useless_li
     result = init_show_booking_loader(response=response,
                                       item=response.meta["data_proto"])
     result.get_output_value(
         'showing')['total_seat_count'] = total_seat_count
     # empty seat is generated by json api, so we need another request
     # extract json url from javascript
     script_text = response.xpath(
         '//script[contains(.,"ajax")]/text()').extract_first()
     m = re.search(r"url: \"(.+)\"", script_text)
     tail = m.group(1)
     m = re.search(r"data: \"(.+)\"", script_text)
     parameters = m.group(1)
     url = self.generate_seat_json_url(tail=tail, parameters=parameters)
     request = scrapy.Request(url, callback=self.parse_showing_seat_json)
     request.meta["data_proto"] = result.load_item()
     yield request
Ejemplo n.º 15
0
    def parse_showing(self, response, curr_showing, data_proto, result_list):
        def parse_time(time_str):
            time = time_str.split(":")
            return (int(time[0]), int(time[1]))

        showing_data_proto = ShowingLoader(response=response)
        showing_data_proto.add_value(None, data_proto.load_item())
        start_time = curr_showing.xpath(
            './span[@class="start-time digit"]/text()').extract_first()
        start_hour, start_minute = parse_time(start_time)
        showing_data_proto.add_value(
            'start_time', self.get_time_from_text(start_hour, start_minute))
        end_time = curr_showing.xpath(
            './span[@class="end-time digit"]/text()').extract_first()
        end_hour, end_minute = parse_time(end_time)
        showing_data_proto.add_value(
            'end_time', self.get_time_from_text(end_hour, end_minute))
        # TODO cinema name extract failed
        # TODO extract name may be different from real name
        cinema_name = curr_showing.xpath(
            './span[@class="movie-info-theater"]/text()').extract_first()
        # if extract cinema name from showing info, use this one
        if cinema_name:
            showing_data_proto.replace_cinema_name(cinema_name)
        screen_name = "unknown"
        url = curr_showing.xpath(
            './span[@class="purchase-block"]/a/@href').extract_first()
        if url:
            # extract screen name by url parameter
            screen_number = re.findall(r'&sc=(\d+)&', url)
            if screen_number:
                screen_number = screen_number[-1]
                screen_name = "シアター" + screen_number
        # CANNOTSOLVE we cannot get screen name from site for
        # sold out and not sold showings so we have to give it a special
        # screen name
        showing_data_proto.add_screen_name(screen_name)
        showing_data_proto.add_value('seat_type', 'NormalSeat')

        # query screen number from database
        showing_data_proto.add_total_seat_count()
        # check whether need to continue crawl booking data or stop now
        if not self.crawl_booking_data:
            result_list.append(showing_data_proto.load_item())
            return

        booking_data_proto = init_show_booking_loader(response=response)
        booking_data_proto.add_value('showing', showing_data_proto.load_item())
        book_status = curr_showing.xpath(
            './span[@class="purchase-block"]/a/@class').extract_first()
        booking_data_proto.add_book_status(book_status, util=ForumUtil)
        book_status = booking_data_proto.get_output_value('book_status')
        if book_status in ['SoldOut', 'NotSold']:
            # sold out or not sold
            total_seat_count = showing_data_proto.get_output_value(
                'total_seat_count')
            book_seat_count = (total_seat_count
                               if book_status == 'SoldOut' else 0)
            booking_data_proto.add_value('book_seat_count', book_seat_count)
            booking_data_proto.add_time_data()
            result_list.append(booking_data_proto.load_item())
            return
        else:
            # normal, need to crawl book number on order page
            url = curr_showing.xpath(
                './span[@class="purchase-block"]/a/@href').extract_first()
            request = scrapy.Request(url, callback=self.parse_normal_showing)
            request.meta["data_proto"] = booking_data_proto.load_item()
            result_list.append(request)
Ejemplo n.º 16
0
    def parse_showing(self, response, curr_showing, data_proto, result_list):
        def parse_time(time_str):
            time_str = unicodedata.normalize('NFKC', start_time)
            time = time_str.split(":")
            return (int(time[0]), int(time[1]))

        # showing section passed in may be unusable and need to be filtered
        time_section = curr_showing.xpath('./div[@class="time"]')
        if not time_section:
            return
        showing_data_proto = ShowingLoader(response=response)
        showing_data_proto.add_value(None, data_proto.load_item())
        start_time = time_section.xpath('./span/span/text()').extract_first()
        start_hour, start_minute = parse_time(start_time)
        showing_data_proto.add_value(
            'start_time', self.get_time_from_text(start_hour, start_minute))
        end_time = time_section.xpath('./span/text()').extract_first()
        end_hour, end_minute = parse_time(end_time)
        showing_data_proto.add_value(
            'end_time', self.get_time_from_text(end_hour, end_minute))
        screen_name = curr_showing.xpath('./div[2]/a/text()').extract_first()
        showing_data_proto.add_screen_name(screen_name)
        # when site ordering is stopped stop crawling
        site_status = curr_showing.xpath('./a/span[2]/text()').extract_first()
        if site_status == '予約停止中':
            return
        # handle free order seat type showings
        seat_type = curr_showing.xpath(
            './div[@class="icon"]//img/@alt').extract_first()
        showing_data_proto.add_value('seat_type',
                                     AeonUtil.standardize_seat_type(seat_type))

        # query screen number from database
        showing_data_proto.add_total_seat_count()
        # check whether need to continue crawl booking data or stop now
        if not self.crawl_booking_data:
            result_list.append(showing_data_proto.load_item())
            return

        booking_data_proto = init_show_booking_loader(response=response)
        booking_data_proto.add_value('showing', showing_data_proto.load_item())
        book_status = curr_showing.xpath('./a/span/text()').extract_first()
        booking_data_proto.add_book_status(book_status, util=AeonUtil)
        book_status = booking_data_proto.get_output_value('book_status')
        seat_type = showing_data_proto.get_output_value('seat_type')
        if (seat_type == 'FreeSeat' or book_status in ['SoldOut', 'NotSold']):
            # sold out or not sold
            total_seat_count = showing_data_proto.get_output_value(
                'total_seat_count')
            book_seat_count = (total_seat_count
                               if book_status == 'SoldOut' else 0)
            booking_data_proto.add_value('book_seat_count', book_seat_count)
            booking_data_proto.add_time_data()
            result_list.append(booking_data_proto.load_item())
            return
        else:
            # normal, generate request to showing page
            showing_request = self.generate_agreement_request(
                response=response, curr_showing=curr_showing)
            # go to shchedule page again to generate independent cookie
            # for each showing
            schedule_url = response.meta['schedule_url']
            request = scrapy.Request(schedule_url,
                                     dont_filter=True,
                                     callback=self.parse_new_cookie)
            request.meta["data_proto"] = booking_data_proto.load_item()
            request.meta["showing_request"] = showing_request
            (performance_id, _,
             _) = self.extract_showing_parameters(curr_showing)
            request.meta["cookiejar"] = performance_id
            result_list.append(request)
Ejemplo n.º 17
0
    def parse_showing(self, response, curr_showing, data_proto, result_list):
        def parse_time(time_str):
            time = time_str.split(":")
            return (int(time[0]), int(time[1]))

        showing_data_proto = ShowingLoader(response=response)
        showing_data_proto.add_value(None, data_proto.load_item())
        start_time = curr_showing.xpath(
            './div/ol/li[@class="startTime"]/text()').extract_first()
        start_hour, start_minute = parse_time(start_time)
        showing_data_proto.add_value('start_time', self.get_time_from_text(
            start_hour, start_minute))
        end_time = curr_showing.xpath(
            './div/ol/li[@class="endTime"]/text()').extract_first()[1:]
        end_hour, end_minute = parse_time(end_time)
        showing_data_proto.add_value('end_time', self.get_time_from_text(
            end_hour, end_minute))
        # handle free order seat type showings
        seat_type = curr_showing.xpath(
            './div/ul/li[@class="seatIcon"]/img/@src').extract_first()
        showing_data_proto.add_value(
            'seat_type', UnitedUtil.standardize_seat_type(seat_type))

        # query screen number from database
        showing_data_proto.add_total_seat_count()
        # check whether need to continue crawl booking data or stop now
        if not self.crawl_booking_data:
            result_list.append(showing_data_proto.load_item())
            return

        booking_data_proto = init_show_booking_loader(response=response)
        booking_data_proto.add_value('showing', showing_data_proto.load_item())
        book_status = curr_showing.xpath(
            './div/ul/li[@class="uolIcon"]//img[1]/@src').extract_first()
        booking_data_proto.add_book_status(book_status, util=UnitedUtil)
        book_status = booking_data_proto.get_output_value('book_status')
        seat_type = showing_data_proto.get_output_value('seat_type')
        if (seat_type == 'FreeSeat' or book_status in ['SoldOut', 'NotSold']):
            # sold out or not sold
            total_seat_count = showing_data_proto.get_output_value(
                'total_seat_count')
            book_seat_count = (
                total_seat_count if book_status == 'SoldOut' else 0)
            booking_data_proto.add_value('book_seat_count', book_seat_count)
            booking_data_proto.add_time_data()
            result_list.append(booking_data_proto.load_item())
            return
        else:
            # normal, need to crawl book number on order page
            # we will visit schedule page again to generate independent cookie
            # as same cookie will lead to confirm page
            url = curr_showing.xpath(
                './div/ul/li[@class="uolIcon"]/a/@href').extract_first()
            # determine if next page is 4dx confirm page by title
            title = showing_data_proto.get_output_value('title')
            if '4DX' in title:
                request = scrapy.Request(
                    url, callback=self.parse_4dx_confirm_page)
            else:
                request = scrapy.Request(
                    url, callback=self.parse_normal_showing)
            request.meta["data_proto"] = booking_data_proto.load_item()
            # use independent cookie to avoid affecting each other
            request.meta["cookiejar"] = url
            result_list.append(request)