Ejemplo n.º 1
0
    def parse(self, response):

        # 图形验证
        if response.status == 405:
            logging.info('# need image verify')
            self.image_verify_flag = True
            self.isOK = False
            return
        self.isOK = True
        # js验证
        if not response.body.find('automatically'):
            logging.info('# line:145 need js verify')
            self.js_verify_flag = True
            return

        response_data = response.body

        try:

            response_data = re.compile(
                r"""config : (.*), pageEngine""",
                flags=re.DOTALL).search(response_data).group(1)
        except:
            logging.info('# line:156 response data error')
            logging.info('# line:156 need js verify')
            self.js_verify_flag = True
            self.isOK = False
            return

        json_dict = json.loads(response_data.decode("utf8", "ignore"))

        try:
            valid_airline = jsonpath(json_dict, '$..siteConfiguration')[0]

            # js验证
            if not valid_airline:
                logging.info('# need js verify')
                self.js_verify_flag = True
                return

            json_dict = jsonpath(json_dict, '$..Availability')

            # 航线无效
            if not json_dict:
                # logging.info("no flight" + json.dumps(response.meta.get('invalid')))
                # print response.meta.get('invalid')
                self.task.append(response.meta.get('invalid'))
                return

            json_dict = json_dict[0]

        except:
            logging.info('# response data error')
            print traceback.print_exc()
            # self.js_verify_flag = True
            self.isOK = False
            return

        currency = json_dict.get('currencyBean').get('code')
        self.isOK = True
        flight_list = json_dict.get('proposedBounds')[0].get(
            'proposedFlightsGroup')

        price_list = json_dict.get('recommendationList')

        for flights in flight_list:

            # 中转
            is_change = len(flights.get('segments'))
            if is_change > 1:
                logging.info('# is change' +
                             json.dumps(response.meta.get('invalid')))
                continue

            flight = flights.get('segments')[0]
            carrier = flight.get('airline').get('code')
            flight_number = carrier + flight.get('flightNumber')

            dep_port = flight.get('beginLocation').get('locationCode')
            arr_port = flight.get('endLocation').get('locationCode')

            from_city = self.portCitys.get(dep_port, dep_port)
            to_city = self.portCitys.get(arr_port, arr_port)

            # "beginDate": "Mar 20, 2019 12:05:00 PM",
            dt_time = flight.get('beginDate')
            dt_stamp = time.mktime(
                time.strptime(dt_time, '%b %d, %Y %I:%M:%S %p'))
            at_time = flight.get('endDate')
            at_stamp = time.mktime(
                time.strptime(at_time, '%b %d, %Y %I:%M:%S %p'))

            flight_id = flight.get('id')

            price_info_list = []

            for price_json in price_list:

                seat_info = price_json.get('bounds')[0].get(
                    'flightGroupList')[0]

                # 寻找与价格对应的价格信息
                if flight_id == seat_info.get('flightId'):
                    price_info = price_json.get('recoAmount')
                    net_fare = price_info.get('amountWithoutTax')
                    tax = price_info.get('tax')
                    adult_price = net_fare + tax
                    seat = seat_info.get('numberOfSeatsLeft')
                    cabin = seat_info.get('rbd')

                    price_info_list.append(
                        [adult_price, net_fare, tax, seat, cabin])

                    # 找到对应元素移除,避免下次遍历
                    price_list.remove(price_json)

            # 寻找最低价
            # print '# 213 price info: ', price_info_list

            try:
                net_fare = price_info_list[0][1]
                tax = price_info_list[0][2]
                adult_price = net_fare + tax
                seat = price_info_list[0][3]
                cabin = price_info_list[0][4]
            except:
                print response.meta.get('invalid')
                logging.info('# price error')
                return

            item = SpidersHynItem()
            item.update(
                dict(
                    f=flight_number,
                    d=dt_stamp,
                    a=at_stamp,
                    fc=from_city,
                    tc=to_city,
                    c=currency,
                    ap=adult_price,
                    at=tax,
                    n=net_fare,
                    m=seat,
                    cb=cabin,
                    cr=carrier,
                    i=1,
                    s='[]',
                    g=time.time(),
                    da=dep_port,
                    aa=arr_port,
                ))

            # print item
            yield item
Ejemplo n.º 2
0
    def parse(self, response):

        try:
            json_dict = json.loads(response.text)
        except:
            logging.info('###no data')
            return
        # 当天无航班,设置无效
        data_list = json_dict.get('SearchAirlineFlightsResult')
        if data_list is None or len(data_list) == 0:
            logging.info("no flight")
            self.task.append(response.meta.get('invalid'))
            return

        for data in data_list:

            # 是否中转
            # if len(data.get('SegmentInformation')) > 0:
            #     logging.info("is change")
            #     continue
            if data.get('TotalNoStops') != 0:
                logging.info('is change')
                continue

            # 其他航司,设置无效,OD,ID
            carrier = data.get('MACode')
            if carrier != 'JT':
                logging.info("### other airline" + carrier)
                self.task.append(response.meta.get('invalid'))
                # return

            depTime = dataUtil.str_to_stamp(data.get('DepDate') + data.get('DepTime'))
            
            arrTime = dataUtil.str_to_stamp(data.get('ArrDate') + data.get('ArrTime'))
            depAirport = data.get('DepCity')
            arrAirport = data.get('ArrCity')
            flightNumber = data.get('MACode') + data.get('FlightNo')
            currency = data.get('Currency')
            segmentInfo = data.get('SegmentInformation')[0]
            cabin = segmentInfo.get('SegBookingClass')

            lowFlight = data.get('PromoFlight')
            if not lowFlight:  # 找出最低价
                lowFlight = data.get('EconomyFlight')
                if not lowFlight:
                    lowFlight = data.get('BusinessFlight')
                    if not lowFlight:
                        lowFlight = data.get('BusinessFlexiFlight')

            # segment['depTerminal'] = jsonpath(lowFlight, '$..TerminalCode')
            # segment['depTerminal']=''

            tax = jsonpath.jsonpath(lowFlight, '$..TaxPerPax')[0]
            netFare = jsonpath.jsonpath(lowFlight, '$..PricePerPax')[0]
            seats = jsonpath.jsonpath(lowFlight, '$..StrikeoutInfo')[0]

            # maxseats = self.ADT
            # segment['seats'] = maxseats

            item = SpidersHynItem()
            item['m'] = int(self.ADT)
            item['f'] = flightNumber
            item['d'] = depTime
            item['a'] = arrTime
            item['da'] = depAirport
            item['aa'] = arrAirport
            item['c'] = currency
            item['cb'] = cabin
            item['cr'] = carrier
            item['s'] = '[]'
            item['i'] = 1
            item['g'] = time.time()
            item['ap'] = netFare + tax
            item['at'] = float(tax)
            item['n'] = float(netFare)
            item['fc'] = self.port_city.get(depAirport, depAirport)
            item['tc'] = self.port_city.get(arrAirport, arrAirport)

            # # 测试数据
            # print ('item', item)
            # # 测试税价
            # print '--------------'
            # print 'tax: ', tax
            # print type(tax), tax

            yield item
Ejemplo n.º 3
0
    def parse(self, response):
        data_dict = json.loads(response.body)

        flight_list = jsonpath(data_dict, '$..trips')[0][0]

        task_data = response.meta.get('task_data')
        # 无航班
        if len(flight_list) == 0:
            # logging.info('# no flight: ' + task_data.get('dep') + task_data.get('arr') + task_data.get('date'))
            self.task.append(response.meta.get('invalid'))
            return

        # 货币单位
        currency = jsonpath(data_dict, '$..bookingCurrencyCode')[0]
        flight_key = jsonpath(data_dict, '$..flightSearchAuthKey')[0]

        temp_info = {
            'flight_key': flight_key,
            'fare_key': '',
        }
        dep_airport = flight_list[0].get('legs')[0].get('departureStation')
        arr_airport = flight_list[0].get('legs')[0].get('arrivalStation')
        # 航线
        air_line = dep_airport + arr_airport
        is_local = hasattr(self, 'local')
        for data in flight_list:
            # 中转
            if int(data.get('stops')) > 0:
                # logging.info('# is change: ' + task_data.get('dep') + task_data.get('arr') + task_data.get('date'))
                continue
            # 税相关参数
            carrier = data.get('carrierCode')
            flight_number = carrier + data.get('flightNumber')
            dep_time = time.mktime(
                time.strptime(data.get('standardTimeOfDeparture'),
                              '%Y%m%d%H%M%S'))
            arr_time = time.mktime(
                time.strptime(data.get('standardTimeOfArrival'),
                              '%Y%m%d%H%M%S'))
            dep_city = self.port_city.get(dep_airport, dep_airport)
            arr_city = self.port_city.get(arr_airport, arr_airport)
            max_seats = self.ADT

            # 获取税参数
            sell_key = data.get('sellKey')
            cabin = 'X'
            # 获取最低价
            if 'e_amount' in data:
                net_fare = float(data.get('e_amount'))
                temp_info['fare_key'] = data.get('e_sellKey') + '|' + sell_key
                cabin = data.get('e_classOfService')
                # logging.info('# test bargain : ' + task_data.get('dep') + task_data.get('arr') + task_data.get(
                #     'date') + ' ' + str(net_fare))
            elif 'd_amount' in data:
                net_fare = float(data.get('d_amount'))
                temp_info['fare_key'] = data.get('d_sellKey') + '|' + sell_key
                cabin = data.get('d_classOfService')
            elif 'y_amount' in data:
                net_fare = float(data.get('y_amount'))
                temp_info['fare_key'] = data.get('y_sellKey') + '|' + sell_key
                cabin = data.get('y_classOfService')
            else:
                net_fare = 0

            # 获取税,初始化情况下,并且有票
            if net_fare is not 0:
                # 线上先从本地中获取,线下都从网络中获取
                if not is_local:
                    # 只有初始化从字典中获取,最后从本地字典变量中获取
                    adult_tax = ze_get_tax(air_line, self.tax_dict, currency)
                else:
                    adult_tax = -1

                if adult_tax == -1:
                    # 获取税后更新字典
                    flight_code = jsonpath(data_dict, '$..flightCodeShare')[0]
                    headers = response.request.headers
                    logging.info('# airline not in tax dict: ' + air_line +
                                 task_data.get('date'))

                    # 网络中获取税
                    adult_tax = get_net_tax(self.ADT, temp_info, headers,
                                            flight_code)

                    # 网络获取税出错,重新请求这个任务
                    if adult_tax is None:
                        self.session_flag = True
                        yield scrapy.Request(
                            self.start_urls[0],
                            headers=self.headers,
                            body=response.meta.get('task_data').get('body'),
                            callback=self.parse,
                            dont_filter=True,
                            meta=response.meta,
                            errback=self.errback,
                            method='POST')

                        return

                    # 当本地local运行时更新字典,该条航线的税不存在时直接添加,存在时对比更新
                    else:
                        # 航线取最高税价,对比功能
                        self.log('got new tax : %s' % adult_tax, 20)
                        tax, cur = self.tax_dict.get(air_line) or [-1, '']
                        if currency == cur or tax == -1:
                            self.tax_dict.update(
                                {air_line: [adult_tax, currency]})
                            if is_local:
                                self.log('%s -> %s' % (tax, adult_tax), 20)
                                update_tax_json(self.tax_dict)
                        elif currency != cur:
                            print '%s->%s in %s' % (cur, currency, air_line)
                            logging.info('# currency error, local update ')

            if net_fare == 0:
                adult_price = 0
                adult_tax = 0
                max_seats = 0
            else:
                adult_price = net_fare + adult_tax

            is_change = 1
            segments = '[]'

            item = SpidersHynItem()
            item.update(
                dict(f=flight_number,
                     d=dep_time,
                     a=arr_time,
                     fc=dep_city,
                     tc=arr_city,
                     c=currency,
                     ap=adult_price,
                     at=adult_tax,
                     n=net_fare,
                     m=max_seats,
                     cb=cabin,
                     cr=carrier,
                     i=is_change,
                     s=segments,
                     g=time.time(),
                     da=dep_airport,
                     aa=arr_airport,
                     info=json.dumps(temp_info)))

            # print item
            yield item
Ejemplo n.º 4
0
    def parse(self, response):
        try:
            json_dict = json.loads(response.body)
        except:
            print(' except second request error')
            self.flag = True
            return

        # 当天无航班
        if len(json_dict.get('itinerary')) == 0:
            logging.info("no flight")
            self.task.append(response.meta.get('invalid'))
            return

        for data in json_dict.get('itinerary'):
            item = SpidersHynItem()
            outbound = data.get('outbound')

            # TOM 4238
            item["f"] = self.carrier + outbound.get('flightno').split(' ')[1]
            item['d'] = time.mktime(
                time.strptime(
                    outbound['schedule']['departureDate'] +
                    outbound['schedule']['departureTime'], '%Y/%m/%d%H:%M'))
            item['a'] = time.mktime(
                time.strptime(
                    outbound.get('schedule').get('arrivalDate') +
                    outbound.get('schedule').get('arrivalTime'),
                    '%Y/%m/%d%H:%M'))

            item['da'] = outbound.get('departureAirportData').get('id')
            item['aa'] = outbound.get('arrivalAirportData').get('id')

            item['fc'] = self.portCitys.get(item['da'], item['da'])
            item['tc'] = self.portCitys.get(item['aa'], item['aa'])
            item['m'] = int(data.get('minAvail'))

            # # 测试税费
            # if data.get('pricePP') != data.get('price'):
            #     print '#####################TaX test: ' + item['depAirport'] + '-' + item['arrAirport'] + '-' + item[
            #         'depTime']

            if item['m'] < int(self.ADT):
                item['m'] = 0
                item['ap'] = 0
                item['at'] = 0
                item['n'] = 0
            else:
                item['c'] = self.currency
                item['ap'] = float(data.get('price')) / int(self.ADT)
                item['n'] = float(data.get('pricePP'))
                item['at'] = 0

            item['cb'] = self.cabin
            item['cr'] = self.carrier
            item['i'] = self.isChange
            item['s'] = '[]'
            item['g'] = time.time()

            # 测试数据
            # print ('item', item)

            yield item
Ejemplo n.º 5
0
    def parse(self, response):
        response_dict = json.loads(response.text)

        data_dict = response_dict.get('data').get('booking').get('mouta')
        # 返回json,ip被封
        request_error_code = jsonpath(response_dict, '$..errorid')
        if request_error_code:
            if request_error_code[0] != 73003:

                fligth_list = jsonpath(data_dict, '$..list_flight')
                # 无航班
                if fligth_list or request_error_code[0] == 2130258:
                    self.task.append(response.meta.get('invalid'))
                    # logging.info('# no flight 2130258')
                    return

                # logging.info('# second ip denied json')
                time.sleep(2)
                meta = response.meta
                parse_task_data = meta.get('parse_task_data')
                # 设置无效
                invalid = {
                    'date': parse_task_data.get('date'),
                    'depAirport': parse_task_data.get('dep'),
                    'arrAirport': parse_task_data.get('arr'),
                    'mins': self.custom_settings.get('INVALID_TIME')
                }

                yield scrapy.Request(
                    parse_task_data.get('start_url'),
                    body=meta.get('post_data'),
                    callback=self.parse,
                    dont_filter=True,
                    meta={
                        'invalid': invalid,
                        'parse_task_data': parse_task_data,
                        'post_data': meta.get('post_data')
                    },
                    method='POST',
                    errback=self.errback,
                )

                return

        # 当天无航班
        number_code = jsonpath(response_dict, '$..SUBERROR_ITEMS')
        if number_code:
            if number_code[0][0].get('NUMBER') == 931 or number_code[0][0].get(
                    'NUMBER') == 977:
                self.task.append(response.meta.get('invalid'))
                # logging.info('# no flight')
                return
            if number_code[0][0].get('NUMBER') == 979 or number_code[0][0].get(
                    'NUMBER') == 866:
                print response.text
                return

        # 航班列表
        try:
            fligth_list = jsonpath(data_dict, '$..list_flight')[0]
        except:
            print response.text

        # 解析数据
        for data in fligth_list:

            # 判断中转
            if data.get('availabilityInfo').get('stops') != 0:
                # logging.info('# is change')
                continue

            # 航班数据字典
            segments = data.get('segments')[0]
            # 判断航司
            carrier = segments.get('airline').get('code')
            if carrier != 'A5':
                # logging.info("# other airline" + carrier)
                self.task.append(response.meta.get('invalid'))

            flight_number = carrier + segments.get('flight_number')
            dep_time = time.mktime(
                time.strptime(
                    segments.get('b_date_date') + segments.get('b_date_time'),
                    '%Y%m%d%H%M'))
            arr_time = time.mktime(
                time.strptime(
                    segments.get('e_date_date') + segments.get('e_date_time'),
                    '%Y%m%d%H%M'))
            dep_airport = segments.get('b_location').get('location_code')
            arr_airport = segments.get('e_location').get('location_code')
            dep_city = self.port_city.get(dep_airport, dep_airport)
            arr_city = self.port_city.get(arr_airport, arr_airport)

            price_dict = data.get('fares')[0]

            currency = price_dict.get('currency_code')
            adult_tax = 0
            max_seats = int(price_dict.get('lsaNbrSeat'))

            # 判断座位数
            if max_seats < 1:
                adult_price = 0
                net_fare = 0
            else:
                adult_price = float(price_dict.get('totalAmount')) / self.ADT
                net_fare = adult_price

            is_change = 1
            segments = '[]'
            cabin = 'X'

            item = SpidersHynItem()
            item.update(
                dict(
                    f=flight_number,
                    d=dep_time,
                    a=arr_time,
                    fc=dep_city,
                    tc=arr_city,
                    c=currency,
                    ap=adult_price,
                    at=adult_tax,
                    n=net_fare,
                    m=max_seats,
                    cb=cabin,
                    cr=carrier,
                    i=is_change,
                    s=segments,
                    g=time.time(),
                    da=dep_airport,
                    aa=arr_airport,
                ))

            # print item

            yield item
Ejemplo n.º 6
0
    def parse(self, response):
        # print 'response', response.text.encode('utf-8')
        try:
            json_dict = json.loads(response.text.encode('utf-8'))
        except:
            logging.info('# ip deny')
            self.isOK = False
            return

        # print json_dict
        # 登陆
        if not json_dict.get('commonRes').get('isOk'):
            error_code = json_dict.get('commonRes').get('code')

            if error_code == 'PipSHP0001':
                logging.info('# no flight' +
                             json.dumps(response.meta.get('invalid')))
                self.task.append(response.meta.get('invalid'))
                return
            if error_code == 'PREVENT0001':
                logging.info('# need login')
                self.isOK = False
                # print r' ' + response.text.encode('utf-8')
                return

        flight_list = json_dict.get('goFlightInfo').get('flightInfo')
        self.isOK = True
        for flight in flight_list:

            # flight = flights.get('flight')[0]
            flight_info = flight.get('flightSegs')[0]
            # 中转
            is_change = flight_info.get('stopAirportsSize')
            if not is_change == 0:
                logging.info('# is change')
                continue

            flight_number = flight_info.get('flightNo')

            dep_port = flight.get('orgAirport').get('airportCode')
            arr_port = flight.get('dstAirport').get('airportCode')

            from_city = self.portCitys.get(dep_port, dep_port)
            to_city = self.portCitys.get(arr_port, arr_port)
            carrier = flight_number[:2]
            dt_stamp = time.mktime(
                time.strptime(flight.get('departTime'), '%Y-%m-%d %H:%M'))
            at_stamp = time.mktime(
                time.strptime(flight.get('arrivalTime'), '%Y-%m-%d %H:%M'))

            price_list = flight_info.get('brandSeg')[0].get('price')
            price_info = ''
            for price_str in price_list:
                price_type = price_str.get('psgType')
                if price_type == 'ADT':
                    price_info = price_str
                    break

            if price_info == '':
                logging.info('###### price error')

            net_fare = price_info.get('price')
            currency = price_info.get('currency')
            tax = 0
            cabin = flight_info.get('brandSeg')[0].get('cabinCode')
            price = net_fare

            seat_str = flight_info.get('brandSeg')[0].get('remaindNum')

            if seat_str == u'>10\u5f20':
                seat = 10
            else:
                seat = int(seat_str[2])

            item = SpidersHynItem()
            item.update(
                dict(
                    f=flight_number,
                    d=dt_stamp,
                    a=at_stamp,
                    fc=from_city,
                    tc=to_city,
                    c=currency,
                    ap=price,
                    at=tax,
                    n=net_fare,
                    m=seat,
                    cb=cabin,
                    cr=carrier,
                    i=1,
                    s='[]',
                    g=time.time(),
                    da=dep_port,
                    aa=arr_port,
                ))

            # print item
            # print flight_info.get('brandSeg')[0].get('brandInfo').get('text')
            yield item
Ejemplo n.º 7
0
    def parse(self, response):
        json_dict = json.loads(response.text)

        # 当天无航班
        oa = json_dict.get('OutAvailability')
        if not oa or not len(oa):
            logging.info("no flight")
            self.task.append(response.meta.get('invalid'))
            return

        for data in oa:

            # 是否中转
            if len(data.get('Stops')) > 0:
                # logging.info("is change")
                continue

            dep_port = data.get('DepartureAirport')
            arr_port = data.get('ArrivalAirport')
            flight_number = data.get('FlightNumber')
            from_city = self.portCitys.get(dep_port, dep_port)
            to_city = self.portCitys.get(arr_port, arr_port)
            carrier = flight_number[:2]
            cabin = data.get('ClassCode')

            dt_time = data.get('DepartureTime')
            dt_date = data.get('DepartureDateText').split(' ')[0]
            dt_stamp = time.mktime(
                time.strptime('%s %s' % (dt_date, dt_time), '%m/%d/%Y %H:%M'))
            at_time = data.get('ArrivalTime')
            at_date = data.get('ArrivalDateText').split(' ')[0]
            at_stamp = time.mktime(
                time.strptime('%s %s' % (at_date, at_time), '%m/%d/%Y %H:%M'))

            seat = int(data.get('SeatCount'))
            if seat < int(self.ADT):
                seat = 0
                price = 0
                tax = 0
                net_fare = 0
                currency = ''
            else:
                price_str = data.get('Price')
                price = float(price_str[1:])
                tax = float(data.get('TaxValue')[1:])
                net_fare = float(data.get('FareValue')[1:])
                # 货币单位处理
                if price_str[0] != 'R':
                    self.log(
                        "currency error: %s->%s at %s Price: %s" %
                        (dep_port, arr_port, dt_date, price_str), 30)
                    continue
                currency = self.currency

            item = SpidersHynItem()
            item.update(
                dict(
                    f=flight_number,
                    d=dt_stamp,
                    a=at_stamp,
                    fc=from_city,
                    tc=to_city,
                    c=currency,
                    ap=price,
                    at=tax,
                    n=net_fare,
                    m=seat,
                    cb=cabin,
                    cr=carrier,
                    i=1,
                    s='[]',
                    g=time.time(),
                    da=dep_port,
                    aa=arr_port,
                ))

            yield item
Ejemplo n.º 8
0
    def parse(self, response):
        print('parse', response.body)

        try:
            data_dict = json.loads(response.text)
        except:
            self.session_flag = True
            logging.info('parse request error')
            yield scrapy.Request(method='POST',
                                 url=self.start_urls[0],
                                 body=response.meta.get('pay_load'),
                                 meta=response.meta,
                                 dont_filter=True,
                                 callback=self.parse)
            return

        flight_list = data_dict.get('outboundFlights')
        currency = data_dict.get('currencyCode')
        for data in flight_list:

            dep_airport = data.get('departureStation')
            arr_airport = data.get('arrivalStation')
            carrier = data.get('carrierCode')
            flight_number = carrier + data.get('flightNumber')
            dep_time = time.mktime(time.strptime(data.get('departureDateTime'), '%Y-%m-%dT%H:%M:%S'))
            arr_time = time.mktime(time.strptime(data.get('arrivalDateTime'), '%Y-%m-%dT%H:%M:%S'))
            fares = jsonpath(data, '$..fares')[0]

            adult_price = 0.0
            adult_tax = 0.0
            net_fare = 0.0
            max_seats = 0.0
            for fare in fares:
                if fare.get('wdc') is True:  # 排除掉wizz club的价格,注释掉即是会员折扣价
                    continue

                if adult_price != 0 and adult_price < fare.get('fullBasePrice').get('amount'):
                    continue

                net_fare = fare.get('discountedFarePrice').get('amount')
                adult_tax = fare.get('administrationFeePrice').get('amount')
                adult_price = net_fare + adult_tax
                max_seats = fare.get('availableCount')

            cabin = 'X'
            is_change = 1
            segments = '[]'

            item = SpidersHynItem()
            item.update(dict(
                f=flight_number,
                d=dep_time,
                a=arr_time,
                # fc=self.port_city.get(dep_airport, dep_airport),
                # tc=self.port_city.get(arr_airport, arr_airport),
                fc='SHA',
                tc='BJK',
                c=currency,
                ap=adult_price,
                at=adult_tax,
                n=net_fare,
                m=max_seats,
                cb=cabin,
                cr=carrier,
                i=is_change,
                s=segments,
                g=time.time(),
                da=dep_airport,
                aa=arr_airport,
            ))

            print item
            yield item
Ejemplo n.º 9
0
    def parse(self, response):
        soup = BeautifulSoup(response.body, "lxml")
        soup.find_all()
        # 航班DIV列表
        result = soup.find('div', class_="sortThisTable")
        task_data = response.meta.get('invalid')
        try:
            flight_list = result.find_all('div', class_="row rowsMarket1")
            # 隔天航班
            next_days = result.find_all('div', class_="rowsMarket1 row govNext ")
        except:
            logging.info("# access denied")
            self.proxy_flag = True
            # time.sleep(3)
            access_str = str(response.body)
            if not 'access' in access_str:
                logging.info('# server error')
                time.sleep(120)

            # 先return处理,后续需要考虑cookies失效情况
            return
        if len(next_days) != 0:
            # logging.info('## next day flight')
            for next_day in next_days:
                flight_list.append(next_day)
        for data in flight_list:

            flight_number = data.find_all('div', class_="fi-header-text text-uppercase text-right")
            flight = data.find_all('input', class_="bfsFlightInfo")
            if len(flight) == 0:
                logging.info(
                    '# no flight: ' + task_data.get('depAirport') + task_data.get('arrAirport') + task_data.get('date'))
                self.task.append(response.meta.get('invalid'))
                continue

            flight_info_number = flight[0]['value'].replace('\n', '').replace('\t', '').replace('\'', '\"')
            flight_info = data.find_all('div', class_="standardFare radio")[0]('input')
            flight_info = flight_info[0]['value'].replace('\n', '').replace('\t', '').replace('\'', '\"').split('~~')[
                -2].split('~')

            # 中转
            if len(flight_number) > 1:
                logging.info(
                    '# is change: ' + task_data.get('depAirport') + task_data.get('arrAirport') + task_data.get('date'))
                continue

            # {u'std': u'2019-01-27T14:30:00.0000000-05:00', u'asc': u'ACY', u'cc': u'NK', u'dsc': u'FLL', u'fn': u'262'}
            flight_info_number = json.loads(flight_info_number[1:-1].replace(' ', ''))

            carrier = flight_info_number.get('cc')
            flight_number = carrier + flight_info_number.get('fn')
            # print time.strptime(flight_info[1], '%m/%d/%Y %H:%M')
            dep_time = time.mktime(time.strptime(flight_info[1], '%m/%d/%Y %H:%M'))
            arr_time = time.mktime(time.strptime(flight_info[3], '%m/%d/%Y %H:%M'))
            dep_airport = flight_info[0]
            arr_airport = flight_info[2]

            price_str = data('em')[-1].text
            net_fare = float(price_str[1:])

            if price_str[0] == '$':
                currency = 'USD'
            else:
                logging.info('# other currency')
                continue

            adult_tax = 0
            adult_price = net_fare
            segments = '[]'
            max_seats = self.ADT
            is_change = 1
            cabin = 'X'

            item = SpidersHynItem()
            item.update(dict(
                f=flight_number,
                d=dep_time,
                a=arr_time,
                fc=self.port_city.get(dep_airport, dep_airport),
                tc=self.port_city.get(arr_airport, arr_airport),
                c=currency,
                ap=adult_price,
                at=adult_tax,
                n=net_fare,
                m=max_seats,
                cb=cabin,
                cr=carrier,
                i=is_change,
                s=json.dumps(segments),
                g=time.time(),
                da=dep_airport,
                aa=arr_airport
            ))

            # print item
            yield item
Ejemplo n.º 10
0
    def parse(self, response):
        json_dict = json.loads(response.text)
        # IP被封
        if json_dict.get('errorcode') == '9990':
            logging.info('# ip deny')
            self.isOK = False
            time.sleep(2)
            return
        flight_list = json_dict.get('goflight')

        # 无航班
        if not flight_list:
            # logging.info("no flight" + json.dumps(response.meta.get('invalid')))
            # print response.meta.get('invalid')
            self.task.append(response.meta.get('invalid'))

            return

        currency = json_dict.get('cur')
        order_id = json_dict.get('orderid')
        # logging.info('# seat info: ' + self.session_id + '-' + order_id)
        ip_proxies = {"http": response.meta.get('proxy')}
        self.isOK = True
        for flights in flight_list:

            flight = flights.get('flight')[0]
            # 中转
            is_change = flight.get('stops')
            if not is_change == '0':
                logging.info('# is change')
                continue

            flight_number = flight.get('fltno')

            dep_port = flight.get('ori')
            arr_port = flight.get('dest')

            from_city = self.portCitys.get(dep_port, dep_port)
            to_city = self.portCitys.get(arr_port, arr_port)
            carrier = flight_number[:2]
            dt_time = flight.get('oritime')
            dt_date = flight.get('fltdate')
            dt_stamp = time.mktime(
                time.strptime('%s%s' % (dt_date, dt_time), '%Y%m%d%H:%M'))
            at_time = flight.get('desttime')
            at_stamp = time.mktime(
                time.strptime('%s%s' % (dt_date, at_time), '%Y%m%d%H:%M'))

            # 隔天
            if at_stamp < dt_stamp:
                at_stamp += +24 * 3600

            net_fare = 0
            cabin_list = ['SAVER', 'SMART', 'PLUS']
            cabin_count = 0

            # 处理三种套餐价格
            for i in range(1, 4):
                price_str = flights.get('fare0' + str(i))
                if price_str != '0':
                    cabin_count = str(i)
                    net_fare = int(price_str)
                    break

            if net_fare == 0:
                logging.info('# price is 0')
                continue
            tax = int(flights.get('fare0' + cabin_count + 'cn')) + int(
                flights.get('fare0' + cabin_count + 'yq')) + int(
                    flights.get('fare0' + cabin_count + 'tax'))

            querystring = {
                "gocode": "0" + cabin_count,
                "goflightno": flight_number,
                "orderid": order_id
            }

            # 获取座位数
            try:
                requests.get(self.info_url,
                             proxies=ip_proxies,
                             params=querystring,
                             headers=self.headers,
                             timeout=self.timeout)
                res = requests.get(self.seat_url + order_id,
                                   proxies=ip_proxies,
                                   headers=self.headers,
                                   timeout=self.timeout)
                seat = int(json.loads(res.text).get('flights')[0].get('num'))

                # session失效
                if seat is None:
                    self.session_flag = True
                    return

            except:
                # print traceback.print_exc()
                logging.info('# get seat error')
                self.isOK = False
                return

            cabin = cabin_list[int(cabin_count) - 1]
            if tax == 0:
                print '## tax error', flights
            price = net_fare + tax

            info = {
                'session_id': self.session_id,
                'order_id': order_id,
                'go_code': '0' + cabin_count
            }

            item = SpidersHynItem()
            item.update(
                dict(f=flight_number,
                     d=dt_stamp,
                     a=at_stamp,
                     fc=from_city,
                     tc=to_city,
                     c=currency,
                     ap=price,
                     at=tax,
                     n=net_fare,
                     m=seat,
                     cb=cabin,
                     cr=carrier,
                     i=1,
                     s='[]',
                     g=time.time(),
                     da=dep_port,
                     aa=arr_port,
                     info=json.dumps(info)))

            # print item
            yield item
Ejemplo n.º 11
0
    def parse(self, response):

        data_dict = json.loads(response.body)
        try:
            flight_list = jsonpath(data_dict, '$..departures')[0]
        except:
            # self.token_flag = True
            logging.info('# invalid airport')
            print '# flight_list ', response.text
            return

        self.proxy_flag = False
        datas = response.meta.get('invalid')
        if len(flight_list) == 0:
            datas = response.meta.get('invalid')
            logging.info('# no flight: ' + datas.get('depAirport') +
                         datas.get('arrAirport') + datas.get('date'))
            self.task.append(response.meta.get('invalid'))

        for data in flight_list:

            # 中转
            if len(data.get('legs')) > 1:
                logging.info('is change')
                continue
            flight_info = data.get('legs')[0]
            flight_number = str(flight_info.get('flightNumber')).replace(
                ' ', '')
            carrier = flight_number[0:2]

            if carrier != 'TR':
                print '# other airline'
                continue

            # 2018-12-31T00: 55: 00
            dep_time = time.mktime(
                time.strptime(flight_info.get('departureDateTime'),
                              '%Y-%m-%dT%H:%M:%S'))
            arr_time = time.mktime(
                time.strptime(flight_info.get('arrivalDateTime'),
                              '%Y-%m-%dT%H:%M:%S'))
            dep_airport = flight_info.get('departure')
            arr_airport = flight_info.get('arrival')

            price_info_list = data.get('fareClasses')
            price_info = price_info_list[0]

            try:
                adult_price = float(price_info.get('price').get('amount'))
            except:
                print '### price error: ' + datas.get(
                    'depAirport') + datas.get('arrAirport') + datas.get('date')
                continue

            currency = price_info.get('price').get('currency')
            net_fare = adult_price
            cabin = price_info.get('productCode')
            max_seats = int(data.get('journeyInfo').get('seatLeft'))
            adult_tax = 0
            is_change = 1

            segments_data = ''
            for i in price_info_list:
                if i.get('name') == 'FlyBag':
                    segments_data = i
                    break
            segments = []
            try:
                if segments_data != '':
                    segments.append(
                        [segments_data.get('price').get('amount'), max_seats])
                else:
                    segments = [[0, 0]]
            except:
                print '### segments price error: ' + datas.get(
                    'depAirport') + datas.get('arrAirport') + datas.get('date')
                continue

            item = SpidersHynItem()
            item.update(
                dict(f=flight_number,
                     d=dep_time,
                     a=arr_time,
                     fc=self.port_city.get(dep_airport, dep_airport),
                     tc=self.port_city.get(arr_airport, arr_airport),
                     c=currency,
                     ap=adult_price,
                     at=adult_tax,
                     n=net_fare,
                     m=max_seats,
                     cb=cabin,
                     cr=carrier,
                     i=is_change,
                     s=json.dumps(segments),
                     g=time.time(),
                     da=dep_airport,
                     aa=arr_airport))

            yield item
Ejemplo n.º 12
0
    def parse(self, response):

        try:
            response_dict = json.loads(response.body)
        except:
            print response.text
            return

        no_flight = jsonpath(response_dict, '$..generatedJSonByPrice')
        if not no_flight:
            # logging.info('# no flight' + str(response.meta.get('invalid')))
            self.task.append(response.meta.get('invalid'))
            return

        flight_dict = json.loads(
            jsonpath(response_dict, '$..generatedJSonByPrice')[0])

        flight_list = jsonpath(flight_dict, '$..list_flight')

        for data in flight_list:
            # 中转
            flight_info_change = jsonpath(data, '$..list_segment')
            if len(flight_info_change) > 1:
                # logging.info('# is change ')
                continue

            flight_info = flight_info_change[0][0]

            carrier = flight_info.get('airline').get('code')
            flight_number = carrier + flight_info.get('flight_number')

            dep_time = time.mktime(
                time.strptime(
                    flight_info.get('b_date_date') +
                    flight_info.get('b_date_time'), '%Y%m%d%H%M'))
            arr_time = time.mktime(
                time.strptime(
                    flight_info.get('e_date_date') +
                    flight_info.get('e_date_time'), '%Y%m%d%H%M'))
            dep_airport = flight_info.get('b_location').get('location_code')
            arr_airport = flight_info.get('e_location').get('location_code')

            dep_city = self.port_city.get(dep_airport, dep_airport)
            arr_city = self.port_city.get(arr_airport, arr_airport)

            adult_tax = 0

            list_price = jsonpath(data, '$..list_price')[0][0]
            max_seats = int(list_price.get('lsaNbrSeat'))
            currency = list_price.get('currency_code')
            if max_seats < 1:
                adult_price = 0
                net_fare = 0

            else:
                adult_price = float(list_price.get('totalPrice')) / self.ADT
                net_fare = adult_price

            is_change = 1
            segments = '[]'
            cabin = list_price.get('rbdFlight')

            item = SpidersHynItem()
            item.update(
                dict(f=flight_number,
                     d=dep_time,
                     a=arr_time,
                     fc=dep_city,
                     tc=arr_city,
                     c=currency,
                     ap=adult_price,
                     at=adult_tax,
                     n=net_fare,
                     m=max_seats,
                     cb=cabin,
                     cr=carrier,
                     i=is_change,
                     s=segments,
                     g=time.time(),
                     da=dep_airport,
                     aa=arr_airport))

            # print item
            # print flight_info.get('b_date_date') + flight_info.get('b_date_time')
            yield item