Exemple #1
0
    def parse(self, response):
        if 'Enter the captcha code' in response.body:  # 被封IP, 需要输入验证码, 嗯。。。就算输正确也是让你继续输的那种
            time.sleep(2)
            self.log('please input captcha code', 20)
            params = response.meta.get('params')
            total_url = self.start_urls[0] + params
            yield scrapy.Request(url=total_url,
                                 callback=self.transit,
                                 meta={'params': params, 'flag': 1},
                                 dont_filter=True)
            return

        trs = response.xpath('//tbody[@id="tbodyOnward"]/tr[not(@class)]')
        for tr in trs:

            td_px = tr.xpath('td[contains(@class, "px")]')
            if not int(len(td_px)):
                continue
            td_0 = td_px[0]
            ischange = int(td_0.xpath('input[@name="stops"]/@value').extract_first())
            if ischange:
                continue

            duration = twUtil.format_duration(tr.xpath('td/span[@class="f_time"]/text()').extract_first())
            flightNumber = td_0.xpath('input[@name="flightNumber"]/@value').extract_first()
            airline = td_0.xpath('input[@name="carrierCode"]/@value').extract_first()
            depTime = twUtil.format_time(td_0.xpath('input[@name="scheduledDepartureDateTime"]/@value').extract_first())
            arrTime = twUtil.format_time(td_0.xpath('input[@name="scheduledArrivalTime"]/@value').extract_first())
            dep = td_0.xpath('input[@name="origin"]/@value').extract_first()
            dest = td_0.xpath('input[@name="destination"]/@value').extract_first()
            carrier = td_0.xpath('input[@name="airlineCode"]/@value').extract_first()
            dep_seg = dataUtil.format_seg_time(depTime)
            dest_seg = dataUtil.format_seg_time(arrTime)

            seats, fare, tax, cabin, currency = 0, 0, 0, None, None  # 数据初始化

            for td in td_px:  # 获取最低价
                try:
                    span = td.xpath('label/span')
                    if not len(span):
                        continue
                except:  # 当前舱位无票
                    traceback.print_exc()
                    continue
                fare = span.xpath('input[@name="fare"]/@value').extract_first()
                seats = span.xpath('input[@name="numberOfSeats"]/@value').extract_first()
                tax_part = span.xpath('input[@name="tax"]/@value').extract_first()
                surcharge = span.xpath('input[@name="surcharge"]/@value').extract_first()
                cabin = span.xpath('input[@name="fareClass"]/@value').extract_first()
                currency = span.xpath('input[@name="currency"]/@value').extract_first()
                tax = float(tax_part) + float(surcharge)
                if int(seats) >= 3:
                    break

            seg = {'flightNumber': flightNumber}
            seg['aircraftType'] = ''
            seg['number'] = 1
            seg['airline'] = airline
            seg['dep'] = dep
            seg['dest'] = dest
            seg['duration'] = duration
            seg['departureTime'] = dep_seg
            seg['destinationTime'] = dest_seg
            seg['depTerminal'] = ''
            seg['seats'] = seats

            item = LmdSpidersItem()
            item['maxSeats'] = seats
            item['flightNumber'] = flightNumber
            item['depTime'] = depTime
            item['arrTime'] = arrTime
            item['depAirport'] = dep
            item['arrAirport'] = dest
            item['cabin'] = cabin
            item['currency'] = currency
            item['carrier'] = carrier
            item['isChange'] = 1
            item['getTime'] = time.time()
            item['adultPrice'] = float(fare) + tax
            item['adultTax'] = tax
            item['netFare'] = fare
            item['fromCity'] = self.portCitys.get(dep, dep)
            item['toCity'] = self.portCitys.get(dest, dest)
            item['segments'] = '[]'
            yield item
Exemple #2
0
 def parse(self, response):
     try:
         content = json.loads(response.body)
     except Exception as e:
         self.log(e, 20)
         self.isOK = False
         return
     self.isOK = True
     try:
         dep, arr = jsonpath(content, '$..airportInfo')[0].replace(' ', '').split('-')
     except:
         if content.get('code') == 400521204:
             self.log('error_no_routes_exists', 20)
             print(content.get('infoList'))
             return
     date = jsonpath(content, '$..selectedDate')
     date = date[0]
     flights = jsonpath(content, '$..cards')[0]
     for flight in flights:
         if jsonpath(flight, '$..numberOfStops')[0]:
             continue
         flightNumber = self.carrier + flight.get('flightNumbers')
         dep_time_str = '%s %s:00' % (date, flight.get('departureTime'))  # 字符串格式的出发时间
         arr_time_str = '%s %s:00' % (date, flight.get('arrivalTime'))  # 字符串格式的到达时间
         dep_time = time.mktime(time.strptime(dep_time_str, '%Y-%m-%d %H:%M:%S'))
         arr_time = time.mktime(time.strptime(arr_time_str, '%Y-%m-%d %H:%M:%S'))
         _fare = flight.get('fares')
         product_id = ''
         keys = ['Anytime']
         seg = [[0, 0]]
         if not _fare:
             netfare = 0
             seat = 0
             currency = 'USD'
         else:
             price = None
             for fare in _fare:
                 if fare.get('reasonIfUnavailable'):
                     continue
                 key = fare.get('fareDescription')
                 if key in keys:
                     index = keys.index(key)
                     flag_netfare = float(jsonpath(fare, '$..amount')[0])
                     seat_str = fare.get('limitedSeats')
                     flag_seat = 9 if not seat_str else int(seat_str.split(' ')[0])
                     flag_product_id = jsonpath(fare, '$..productId')[0]
                     seg[index] = [flag_netfare, flag_seat, flag_product_id]
                 if not price:
                     price = fare
             netfare = float(jsonpath(price, '$..amount')[0])
             currency = jsonpath(price, '$..currencyCode')[0]
             seat_str = price.get('limitedSeats')
             product_id = jsonpath(price, '$..productId')[0]
             seat = 9 if not seat_str else int(seat_str.split(' ')[0])
         item = LmdSpidersItem()
         item.update(dict(
             flightNumber=flightNumber,
             depAirport=dep,
             arrAirport=arr,
             carrier=self.carrier,
             depTime=dep_time,
             arrTime=arr_time,
             currency=currency,
             segments=json.dumps(seg),
             isChange=1,
             getTime=time.time(),
             fromCity=self.portCitys.get(dep, dep),
             toCity=self.portCitys.get(arr, arr),
             cabin='W',
             adultPrice=netfare,
             netFare=netfare,
             adultTax=0,
             maxSeats=seat,
             info=product_id
         ))
         yield item
Exemple #3
0
    def parse(self, response):
        html_content = response.body
        html_content = html_content.decode("utf-8")  # 获取网页数据
        # print(html_content)

        try:
            flight_results = re.search(r"var flightResults = (.*);",
                                       html_content)
            flight_data = flight_results.group(1)
            flight_data = json.loads(flight_data)[0]
        except:
            # print(html_content)
            self.log(html_content, 30)
            # yield response.request
            return
        currency_flag = re.search(r"'initCurrency', '(.*)'",
                                  html_content).group(1)
        if currency_flag not in self.currency_cache:
            pubUtil.send_email('new Currency from MM!', currency_flag)
        currency = self.currency_cache.get(currency_flag, 'CNY')
        for flights in flight_data:
            flightNumber = flights.get('flightNumber')
            depTime = flights.get("departureTime")
            depTime = time.mktime(time.strptime(depTime, '%Y/%m/%d %H:%M:%S'))
            arrTime = flights.get("arrivalTime")
            arrTime = time.mktime(time.strptime(arrTime, '%Y/%m/%d %H:%M:%S'))
            depAirport = flights.get("originCode")
            arrAirport = flights.get("destinationCode")
            adultTax = float(flights.get("taxAdult"))
            carrier = flightNumber[0:2]
            # isChange = flights.get("arrivalTime")
            # segments = flights.get("arrivalTime")
            getTime = time.time
            # fromCity = flights.get("origin")
            # toCity = flights.get("destination")
            fares = flights.get("fares")
            detail_message = fares.get("happy")
            if not detail_message:
                detail_message = fares.get('happlus')
                if not detail_message:
                    detail_message = fares.get("prime")
            netFare = detail_message.get("fare")
            maxSeats = detail_message.get("seat")
            cabin = detail_message.get("bookingClass")
            adultPrice = netFare + adultTax

            item = LmdSpidersItem()
            item.update(
                dict(
                    flightNumber=flightNumber,
                    depAirport=depAirport,
                    arrAirport=arrAirport,
                    carrier=carrier,
                    depTime=depTime,
                    arrTime=arrTime,
                    currency=currency,
                    # segments=json.dumps([segment]),
                    isChange=1,
                    getTime=time.time(),
                    fromCity=self.portCitys.get(depAirport, depAirport),
                    toCity=self.portCitys.get(arrAirport, arrAirport),
                    adultPrice=adultPrice,
                    netFare=netFare,
                    maxSeats=maxSeats,
                    adultTax=adultTax,
                    cabin=cabin,
                ))
            yield item
Exemple #4
0
    def parse(self, response):
        self.is_ok = True
        try:
            result = json.loads(response.text)
        except Exception as e:
            print(e)
            print(response.text)
            print(response.status)
            return

        try:
            tax_ad = jsonpath(result, '$..taxAd')[0]
        except Exception as e:
            self.log(e, 20)
            params = result.get('param')
            if not params:
                params = response.meta.get('payload')
            dep = params.get('depCity1')
            arr = params.get('arrCity1')
            date = params.get('depDate1')
            self.log('%s->%s on %s no data' % (dep, arr, date), 20)
            return
        fuel_ad = jsonpath(result, '$..fuelAd')[0]
        adult_tax = tax_ad + fuel_ad
        # 航班
        list_fare = result.get('listItineraryFare')
        for item_fare in list_fare:
            dep_airport = item_fare.get('depCity')
            arr_airport = item_fare.get('arrCity')
            from_city = self.port_city.get(dep_airport, dep_airport)
            to_city = self.port_city.get(arr_airport, arr_airport)
            list_flight = item_fare.get('listFlight')
            for flight in list_flight:
                dep_str = flight.get('depDate') + flight.get('depTime')
                arr_str = flight.get('arrDate') + flight.get('arrTime')
                dep_time = time.mktime(time.strptime(dep_str, '%Y%m%d%H%M'))
                arr_time = time.mktime(time.strptime(arr_str, '%Y%m%d%H%M'))
                c, n = re.match(r'([A-Z]+)(\d+)',
                                flight.get('flightNo')).groups()
                flight_number = c + n.lstrip('0')

                net_fare = 0
                cabin = ''
                currency = ''
                seats = 0
                # 获取最低价
                list_cls = flight.get('listCls')
                for cl in list_cls:
                    # 忽略1+1 EVENT促销机票
                    if cl.get('cls') == 'F':
                        # print '#' * 66, '1+1'
                        continue
                    flag_fare = cl.get('priceAd')
                    if not net_fare or net_fare > flag_fare:
                        net_fare = flag_fare
                        cabin = cl.get('cls')
                        currency = cl.get('currency')
                        seats = cl.get('avail')

                item = LmdSpidersItem()
                item.update(
                    dict(
                        flightNumber=flight_number,  # 航班号
                        depTime=dep_time,  # 出发时间
                        arrTime=arr_time,  # 达到时间
                        fromCity=from_city,  # 出发城市
                        toCity=to_city,  # 到达城市
                        depAirport=dep_airport,  # 出发机场
                        arrAirport=arr_airport,  # 到达机场
                        currency=currency,  # 货币种类
                        adultPrice=net_fare + adult_tax,  # 成人票价
                        adultTax=adult_tax,  # 税价
                        netFare=net_fare,  # 净票价
                        maxSeats=seats,  # 可预定座位数
                        cabin=cabin,  # 舱位
                        carrier=flight_number[:2],  # 航空公司
                        isChange=1,  # 是否为中转 1.直达2.中转
                        segments="[]",  # 中转时的各个航班信息
                        getTime=time.time(),
                    ))

                yield item
Exemple #5
0
    def parse(self, response):
        # print(response.body)
        null = ''
        true = 'true'
        false = 'false'
        try:
            data_dict = eval(response.text)
        except:
            logging.info('pls update headers')
            data_post = response.meta.get('data_post')
            self.get_headers()
            yield scrapy.Request(
                method='POST',
                url=self.start_urls[1],
                headers=self.custom_settings.get('DEFAULT_REQUEST_HEADERS'),
                body=json.dumps(data_post),
                meta={'data_post': data_post},
                dont_filter=True,
                callback=self.parse)
            return
        # data_dict = json.loads(response.body)
        journeys = data_dict.get('Journeys')
        currency = data_dict.get('CurrencyCode')
        for journey in journeys:
            depAirport = journey.get('DepartureStation')
            arrAirport = journey.get('ArrivalStation')
            carrier = journey.get('CarrierCode')
            flightNumber = carrier + journey.get('FlightNumber')
            depTime = time.mktime(
                time.strptime(journey.get('STD'), '%Y-%m-%dT%H:%M:%S'))
            arrTime = time.mktime(
                time.strptime(journey.get('STA'), '%Y-%m-%dT%H:%M:%S'))
            fares = jsonpath(journey, '$..Fares')[0]
            lowFare = dict(
                adultPrice=0,
                netFare=0,
                maxSeats=0,
                adultTax=0,
                cabin='',
            )
            # 增加套餐价格,先定义价格表
            price_dict = {'Basic': 0, 'Middle': 0, 'Plus': 0, 'SUPER_ECO': 0}
            lowest = None
            for fare in fares:
                if fare.get(
                        'ProductClass') == 'WC':  # 排除掉wizz club的价格,注释掉即是会员折扣价
                    continue

                paxFareTypes = fare.get('PaxFares')[0].get('PaxFareTypes')
                for paxfare in paxFareTypes:
                    package_name = paxfare.get('PaxFareClass')
                    package_price = max(jsonpath(paxfare, '$..Amount'))
                    price_dict[package_name] = package_price

                paxfare = paxFareTypes[0]
                price = max(jsonpath(paxfare, '$..Amount'))
                netfare = paxfare.get('PureFarePriceAmount')
                lowFare['adultPrice'] = price
                lowFare['netFare'] = netfare
                lowFare['maxSeats'] = fare.get('AvailableCount')
                lowFare['cabin'] = fare.get('ProductClass')
                lowFare['adultTax'] = price - netfare
                break

            # segments = '%s:%s' % (price_dict.get('Middle'), price_dict.get('Plus'))
            segments = [[price_dict.get('Middle'),
                         lowFare.get('maxSeats')],
                        [price_dict.get('Plus'),
                         lowFare.get('maxSeats')]]

            item = LmdSpidersItem()
            item.update(
                dict(
                    flightNumber=flightNumber,
                    depAirport=depAirport,
                    arrAirport=arrAirport,
                    carrier=carrier,
                    depTime=depTime,
                    arrTime=arrTime,
                    currency=currency,
                    segments=json.dumps(segments),
                    isChange=1,
                    getTime=time.time(),
                    fromCity=self.portCitys.get(depAirport, depAirport),
                    toCity=self.portCitys.get(arrAirport, arrAirport),
                ))
            item.update(lowFare)
            yield item
Exemple #6
0
    def parse(self, response):
        self.isOK = True
        self.log('data is parseing.....', 20)
        # print(response.meta.get('data'))
        # print(response.body)
        _as = response.xpath('//div/a')
        for a in _as:
            try:
                flag = a.xpath('./@data-is-super').extract()[0]
            except:
                continue
            if flag == 'true':
                continue

            jour_key = a.xpath('./@data-journeykey').extract()[0]
            s = filter((lambda x: x), re.split(r'[~|\s]+', jour_key))
            carrier, number, dep, dep_date, dep_time, arr, arr_date, arr_time = s
            flightNumber = carrier + number
            dep_ts = self.dt_to_ts('%s %s' % (dep_date, dep_time))
            arr_ts = self.dt_to_ts('%s %s' % (arr_date, arr_time))

            seats_str = a.xpath('./@data-free-places').extract()
            try:
                seats = int(seats_str[0])
            except:
                seats = 9
            jour_fare = a.xpath('./@data-journeyfare').extract()[0]
            fare_dict = json.loads(jour_fare)[0]
            tax = fare_dict.get('tax')
            netFare = fare_dict.get('farePrice')
            price_str = a.xpath('./@data-price-format').extract()[0]
            currency = self.currency_cache.get(price_str[0], 'EUR')
            price = float(price_str[1:])

            fare_key = a.xpath('./@data-farekey').extract()[0]
            s_f = filter((lambda x: x), re.split(r'[~|\s]+', fare_key))
            cabin = s_f[3]

            seg_dep = a.xpath('./@data-dept-date').extract()[0] + ':00'
            seg_arr = a.xpath('./@data-date').extract()[0] + ':00'
            segment = dict(
                flightNumber=flightNumber,
                aircraftType='',
                number=1,
                airline=carrier,
                dep=dep,
                dest=arr,
                departureTime=seg_dep,
                destinationTime=seg_arr,
                depTerminal='',
                seats=seats,
                duration=dataUtil.gen_duration(dep_ts, arr_ts),
            )

            item = LmdSpidersItem()
            item.update(
                dict(
                    flightNumber=flightNumber,
                    depAirport=dep,
                    arrAirport=arr,
                    carrier=carrier,
                    depTime=dep_ts,
                    arrTime=arr_ts,
                    currency=currency,
                    segments=json.dumps([]),
                    isChange=1,
                    getTime=time.time(),
                    fromCity=self.portCitys.get(dep, dep),
                    toCity=self.portCitys.get(arr, arr),
                    cabin=cabin,
                    adultPrice=price,
                    netFare=netFare,
                    adultTax=tax,
                    maxSeats=seats,
                ))
            # print item
            yield item
Exemple #7
0
    def parse(self, response):
        # print(response.body)
        data_dict = json.loads(response.body)
        try:
            avail_data = jsonpath(data_dict, '$..availabilityv2')[0]
        except Exception as e:
            print(e)
            print(response.body)
            post_data = response.meta.get('post_data')
            yield scrapy.Request(
                url=self.start_urls,
                method="POST",
                body=json.dumps(post_data),
                meta={'post_data': post_data},
                dont_filter=True,
            )
            return
        if not avail_data:  # 当天无航班
            return
        currency = avail_data.get('currencyCode')

        fares = avail_data.get('faresAvailable')
        fare_temp = dict()
        if fares:
            for fare in fares:
                fare_temp[fare['key']] = fare['value']

        journeys = jsonpath(avail_data, '$..journeysAvailable')[0]
        for journey in journeys:
            flight_type = journey.get('flightType')
            if flight_type == 'Connect':  # 排除掉非直达航班
                continue
            legs = jsonpath(journey, '$..legs')[0]  # 排除掉多停航班
            if len(legs) > 2:
                continue
            s_cities = jsonpath(legs[1],
                                '$..origin')[0] if len(legs) == 2 else ''
            is_change = 1

            designator = journey.get('designator')
            dep_time_str = designator.get('departure')  # %Y-%m-%dT%H:%M:%S
            arr_time_str = designator.get('arrival')  # %Y-%m-%dT%H:%M:%S
            dep_time = time.mktime(
                time.strptime(dep_time_str, '%Y-%m-%dT%H:%M:%S'))
            arr_time = time.mktime(
                time.strptime(arr_time_str, '%Y-%m-%dT%H:%M:%S'))
            dep = designator.get('origin')
            arr = designator.get('destination')

            identifier = journey.get('segments')[0].get('identifier')
            carrier = identifier.get('carrierCode')
            flight_number = carrier + identifier.get('identifier')
            is_inter = jsonpath(journey, '$..international')[0]
            fail_count = 0 if is_inter else 99
            if not fares or not journey.get('fares'):
                adult_price = 0
                net_fare = 0
                seats = 0
                cabin = ''
                segments = []
            else:
                fare_flag = journey.get('fares')
                low_fare = fare_flag[0]
                low_key = low_fare.get('key')
                low_prices = fare_temp.get(low_key)
                net_fare = jsonpath(low_prices, '$..discountedFare')[0]
                adult_price = jsonpath(low_prices, '$..fareAmount')[0]
                seats = jsonpath(low_fare, '$..availableCount')[0]
                cabin = jsonpath(low_fare, '$..fareCode')[0]

                # 国际线加350, 国内线加225
                adult_price += 350 if is_inter else 225

                # 套餐价格, 有问题。。。。。暂时未解决
                keys = ['0', 'U']
                segments = [[-1, -1]] * len(keys)
                for fare in fare_flag:
                    key = fare.get('key')
                    value = fare.get('value')
                    flag = value.get('fareCode')[1]
                    if flag in keys:
                        index = keys.index(flag)
                    else:
                        continue
                    seat_temp = value.get('availableCount')
                    if not seat_temp:
                        continue
                    price_temp = jsonpath(fare_temp.get(key),
                                          '$..fareAmount')[0]
                    price_temp += 350 if is_inter else 225
                    segments[index] = [price_temp, seat_temp]

            item = LmdSpidersItem()
            item.update(
                dict(
                    flightNumber=flight_number,
                    depAirport=dep,
                    arrAirport=arr,
                    carrier=carrier,
                    depTime=dep_time,
                    arrTime=arr_time,
                    currency=currency,
                    segments=json.dumps(segments),
                    isChange=is_change,
                    getTime=time.time(),
                    fromCity=self.port_citys.get(dep, dep),
                    toCity=self.port_citys.get(arr, arr),
                    adultPrice=adult_price,
                    netFare=net_fare,
                    adultTax=adult_price - net_fare,
                    maxSeats=seats,
                    cabin=cabin,
                    stopCities=s_cities,
                    failCount=fail_count,
                ))
            yield item
Exemple #8
0
    def parse(self, response):
        data = json.loads(response.body)
        self.isOK = True
        code = data.get('code')
        if '33020' == code:
            self.log('no ticket', 20)
            return
        dt = response.meta.get('data').get('beginDate')
        journeys = jsonpath(data, '$..%s' % dt)[0]
        for journey in journeys:
            dep_airport = journey.get('beginCity')
            arr_airport = journey.get('endCity')
            from_city = self.port_city.get(dep_airport, dep_airport)
            to_city = self.port_city.get(arr_airport, arr_airport)
            fn = journey.get('flightNum')

            dep_time_str = '%s %s' % (journey.get('beginDate'),
                                      journey.get('beginDateTime'))
            arr_time_str = '%s %s' % (journey.get('endDate'),
                                      journey.get('endDateTime'))
            dep_time = time.mktime(
                time.strptime(dep_time_str, '%Y-%m-%d %H:%M:%S'))
            arr_time = time.mktime(
                time.strptime(arr_time_str, '%Y-%m-%d %H:%M:%S'))

            fares = journey.get('ta1')
            low_price = 0
            low_fare = None
            for fare in fares:
                this_price = int(fare.get('flightPrice'))
                if not low_price or (low_price > this_price):
                    low_fare = fare
                    low_price = this_price
            adult_price = low_price
            cabin = low_fare.get('bookingClass')
            seats = low_fare.get('seat')
            if u'充足' == seats:
                seats = 10
            else:
                seats = int(seats)

            item = LmdSpidersItem()
            item.update(
                dict(
                    flightNumber=fn,  # 航班号
                    depTime=dep_time,  # 出发时间
                    arrTime=arr_time,  # 达到时间
                    fromCity=from_city,  # 出发城市
                    toCity=to_city,  # 到达城市
                    depAirport=dep_airport,  # 出发机场
                    arrAirport=arr_airport,  # 到达机场
                    currency='CNY',  # 货币种类
                    adultPrice=adult_price,  # 成人票价
                    adultTax=0,  # 税价
                    netFare=adult_price,  # 净票价
                    maxSeats=seats,  # 可预定座位数
                    cabin=cabin,  # 舱位
                    carrier=fn[:2],  # 航空公司
                    isChange=1,  # 是否为中转 1.直达2.中转
                    segments="[]",  # 中转时的各个航班信息
                    getTime=time.time(),
                ))
            yield item
Exemple #9
0
    def parse(self, response):
        response_dict = json.loads(response.body)
        flights = response_dict.get('SearchAirlineFlightsResult')
        # print(flights)
        if not response_dict.get('pSessionID', 1):
            self.isOK = False
            logging.info('ip is invalid...please change the ip')
            data = response.meta
            yield scrapy.Request(
                method='POST',
                url=self.start_urls,
                body=json.dumps(data),
                callback=self.parse,
                meta=data,
                dont_filter=True,
                errback=lambda x: self.download_errback(x, data))
        else:
            self.isOK = True
        if not flights:
            return
        for flight in flights:
            if flight.get('TotalSegmentsWithStopOver') > 1:
                continue
            depTime = dataUtil.str_to_stamp(
                flight.get('DepDate') + flight.get('DepTime'))
            arrTime = dataUtil.str_to_stamp(
                flight.get('ArrDate') + flight.get('ArrTime'))
            depAirport = flight.get('DepCity')
            arrAirport = flight.get('ArrCity')
            flightNumber = flight.get('MACode') + flight.get('FlightNo')
            currency = flight.get('Currency')
            segmentInfo = flight.get('SegmentInformation')[0]
            cabin = segmentInfo.get('SegBookingClass')
            carrier = flight.get('MACode')
            segment = {}
            segment['flightNumber'] = segmentInfo.get('MACode') + flight.get(
                'FlightNo')
            segment['aircraftType'] = segmentInfo.get('AirCraft')
            segment['number'] = 1
            segment['airline'] = segmentInfo.get('OprAirlineCode')
            segment['dep'] = segmentInfo.get('DepCity')
            segment['dest'] = segmentInfo.get('ArrCity')
            segment['duration'] = dataUtil.format_duration(
                segmentInfo.get('TDuration'))
            segment['departureTime'] = dataUtil.str_date_format(
                segmentInfo.get('DepDate') + segmentInfo.get('DepTime'))
            segment['destinationTime'] = dataUtil.str_date_format(
                segmentInfo.get('ArrDate') + segmentInfo.get('ArrTime'))

            lowFlight = flight.get('PromoFlight')
            if not lowFlight:  # 找出最低价
                lowFlight = flight.get('EconomyFlight')
                if not lowFlight:
                    lowFlight = flight.get('BusinessFlight')
                    if not lowFlight:
                        lowFlight = flight.get('BusinessFlexiFlight')

            segment['depTerminal'] = jsonpath(lowFlight, '$..TerminalCode')
            tax = jsonpath(lowFlight, '$..TaxPerPax')[0]
            netFare = jsonpath(lowFlight, '$..PricePerPax')[0]
            seats = jsonpath(lowFlight, '$..StrikeoutInfo')[0]
            maxseats = self.seats
            segment['seats'] = maxseats
            item = LmdSpidersItem()
            item['maxSeats'] = maxseats
            item['flightNumber'] = flightNumber
            item['depTime'] = depTime
            item['arrTime'] = arrTime
            item['depAirport'] = depAirport
            item['arrAirport'] = arrAirport
            item['currency'] = currency
            item['cabin'] = cabin
            item['carrier'] = carrier
            item['segments'] = json.dumps([segment])
            item['isChange'] = 1
            item['getTime'] = time.time()
            item['adultPrice'] = netFare + tax
            item['adultTax'] = tax
            item['netFare'] = netFare
            item['fromCity'] = self.portCitys[depAirport]
            item['toCity'] = self.portCitys[arrAirport]
            yield item
Exemple #10
0
    def parse(self, response):
        try:
            response_dict = json.loads(response.body)
            journeys = jsonpath(response_dict, '$..Journeys')[0]
        except:
            if response.body.lower().find(
                    '<title>access denied</title>') != -1:
                logging.info('access denied!!!')
                self.isOK = False
            else:
                logging.info(response.body)
                return
            data_list = response.meta['data_list']
            yield scrapy.Request(
                method='POST',
                url=self.start_url,
                headers={'Cookie': self.cookie},
                body=json.dumps(data_list),
                meta={'data_list': data_list},
                callback=self.parse,
                dont_filter=True,
                errback=lambda x: self.download_errback(x, data_list))
            return
        self.isOK = True
        for journey in journeys:
            segments = journey.get('Segments')
            if len(segments) > 1:
                continue
            dep = journey.get('DepartureStation')
            arr = journey.get('ArrivalStation')
            dep_time = vyUtil.date_to_stamp(journey.get("STD"))
            arr_time = vyUtil.date_to_stamp(journey.get("STA"))

            fares = journey.get('JourneyFare')

            index_flag = -1
            seats = 1
            for i in range(len(fares)):  # 找出最低价且有票的舱位
                fare = fares[i]
                if fare.get('IsFareAvailable'):
                    seats = fare.get('AvailableCount')
                    if not seats or seats >= 3:
                        index_flag = i
                        break
            if index_flag == -1:
                continue
            fare = fares[index_flag]
            seats = 9 if not seats else seats
            currency = fare.get('CurrencyCode')
            cabin = fare.get('ProductClass')
            price = fare.get('Amount')

            seg = segments[0]
            carrier = seg.get('CarrierCode')
            flightNumber = carrier + seg.get('FlightNumber')
            if not self.filter_number(flightNumber):
                continue

            flightkey = seg.get('SegmentSellKey')
            farekey = fare.get('JourneyFareKey')
            info = dict(flightkey=flightkey, farekey=farekey)

            # 添加segments
            price_a = [[0, 0]] * 2

            # 如果运营商不是vueling, 则座位数为0
            opt_by = journey.get('OperatedBy')
            if not opt_by in ['Vueling', 'WonderFly']:
                print(opt_by)
                seats = 0
            else:

                for i, fare in enumerate(fares[1:]):
                    flag = fare.get('Active')
                    if not flag:
                        continue
                    seat_i = fare.get('AvailableCount')
                    amt = fare.get('Amount')
                    farekey = fare.get('JourneyFareKey')
                    info_i = json.dumps(
                        dict(flightkey=flightkey, farekey=farekey))
                    price_a[i] = [amt, 9 if not seat_i else seat_i, info_i]
            item = LmdSpidersItem()
            item['maxSeats'] = seats
            item['flightNumber'] = flightNumber
            item['depTime'] = dep_time
            item['arrTime'] = arr_time
            item['depAirport'] = dep
            item['arrAirport'] = arr
            item['currency'] = currency
            item['cabin'] = cabin
            item['carrier'] = carrier
            item['segments'] = json.dumps(price_a)
            item['isChange'] = 1
            item['getTime'] = time.time()
            item['adultPrice'] = price
            item['netFare'] = price
            item['fromCity'] = self.portCitys[dep]
            item['toCity'] = self.portCitys[arr]
            item['info'] = json.dumps(info)
            yield item
Exemple #11
0
    def parse(self, response):
        try:
            response_dict = json.loads(response.body)
            journeys = jsonpath(response_dict, '$..Journeys')
            code = response_dict.get('Code')
            if not journeys or not len(journeys):
                if not int(code):
                    return
                logging.info('not journeys... ')
                print(response.body)
                data_dict = response.meta.get('data_dict')
                yield scrapy.Request(
                    method='POST',
                    url=self.start_urls,
                    body=json.dumps(data_dict),
                    meta={'data_dict': data_dict},
                    dont_filter=True,
                    callback=self.parse,
                    errback=lambda x: self.download_errback(x, data_dict))
                return
            currency = jsonpath(response.meta, '$..Currency')[0]
            journeys = journeys[0]
            for journey in journeys:
                infos = journey.get('TravelInfos')
                if len(infos) > 1:
                    continue
                info = infos[0]
                dep = info.get('DepartureAirportCode')
                arr = info.get('ArrivalAirportCode')
                try:
                    depTime = ddUtil.str_to_stamp(info.get('DepartureDate'))
                    arrTime = ddUtil.str_to_stamp(info.get('ArrivalDate'))
                except:
                    logging.info('the format of date is error!')
                    self.isOK = False
                    data_dict = response.meta.get('data_dict')
                    data_info = data_dict.get('GetAvailabilityDetail')
                    print(data_info.get('DepartureAirport'),
                          data_info.get('ArrivalAirport'),
                          data_info.get('BoardDate'))
                    yield scrapy.Request(
                        method='POST',
                        url=self.start_urls,
                        body=json.dumps(data_dict),
                        meta={'data_dict': data_dict},
                        dont_filter=True,
                        callback=self.parse,
                        errback=lambda x: self.download_errback(x, data_dict))
                    break
                flightNumber = info.get('FlightNumber')
                aircraft = info.get('EquipmentType')
                cabin = info.get('ClassCode')
                duration = ddUtil.format_duration(info.get('TravelDuration'))
                carrier = info.get('CarrierCode')
                fare = journey.get('FlyFare')
                bagfare = journey.get('FlyBagFare')
                bageatfare = journey.get('FlyBagEatFare')
                lowfare = fare
                if not lowfare:
                    seats, price, tax = 0, 0, 0
                else:
                    if bagfare and lowfare.get('FareAmount') > bagfare.get(
                            'FareAmount', 999999):
                        lowfare = bagfare
                    if bageatfare and lowfare.get(
                            'FareAmount') > bageatfare.get(
                                'FareAmount', 99999):
                        lowfare = bageatfare
                    seats = lowfare.get('AvailableCount')
                    price = lowfare.get('FareAmount')
                    tax = jsonpath(lowfare, '$..Amount')[0]
                    if price == tax:
                        logging.info('price == tax')
                        self.isOK = False
                        data_dict = response.meta.get('data_dict')
                        yield scrapy.Request(method='POST',
                                             url=self.start_urls,
                                             body=json.dumps(data_dict),
                                             meta={'data_dict': data_dict},
                                             dont_filter=True,
                                             callback=self.parse,
                                             errback=lambda x: self.
                                             download_errback(x, data_dict))
                        break
                self.isOK = True
                segment = dict(
                    flightNumber=flightNumber,
                    aircraftType=aircraft,
                    number=1,
                    airline=carrier,
                    dep=dep,
                    dest=arr,
                    duration=duration,
                    departureTime=dataUtil.format_seg_time(depTime),
                    destinationTime=dataUtil.format_seg_time(arrTime),
                    depTerminal='',
                    seats=seats)

                item = LmdSpidersItem()
                item.update(
                    dict(
                        carrier=carrier,
                        maxSeats=seats,
                        flightNumber=flightNumber,
                        depTime=depTime,
                        arrTime=arrTime,
                        depAirport=dep,
                        arrAirport=arr,
                        cabin=cabin,
                        currency=currency,
                        isChange=1,
                        getTime=time.time(),
                        adultPrice=price,
                        adultTax=tax,
                        netFare=price - tax,
                        fromCity=self.portCitys.get(dep, dep),
                        toCity=self.portCitys.get(arr, arr),
                        segments=json.dumps([segment]),
                    ))

                yield item
        except:
            traceback.print_exc()
            print(response.body)