def parse(self, response): try: content = json.loads(response.body) except Exception as e: self.log(e, 20) self.isOK = False return self.isOK = True try: dep, arr = jsonpath(content, '$..airportInfo')[0].replace(' ', '').split('-') except: if content.get('code') == 400521204: self.log('error_no_routes_exists', 20) print(content.get('infoList')) return date = jsonpath(content, '$..selectedDate') date = date[0] flights = jsonpath(content, '$..cards')[0] for flight in flights: if jsonpath(flight, '$..numberOfStops')[0]: continue flightNumber = self.carrier + flight.get('flightNumbers') dep_time_str = '%s %s:00' % (date, flight.get('departureTime')) # 字符串格式的出发时间 arr_time_str = '%s %s:00' % (date, flight.get('arrivalTime')) # 字符串格式的到达时间 dep_time = time.mktime(time.strptime(dep_time_str, '%Y-%m-%d %H:%M:%S')) arr_time = time.mktime(time.strptime(arr_time_str, '%Y-%m-%d %H:%M:%S')) _fare = flight.get('fares') product_id = '' keys = ['Anytime'] seg = [[0, 0]] if not _fare: netfare = 0 seat = 0 currency = 'USD' else: price = None for fare in _fare: if fare.get('reasonIfUnavailable'): continue key = fare.get('fareDescription') if key in keys: index = keys.index(key) flag_netfare = float(jsonpath(fare, '$..amount')[0]) seat_str = fare.get('limitedSeats') flag_seat = 9 if not seat_str else int(seat_str.split(' ')[0]) flag_product_id = jsonpath(fare, '$..productId')[0] seg[index] = [flag_netfare, flag_seat, flag_product_id] if not price: price = fare netfare = float(jsonpath(price, '$..amount')[0]) currency = jsonpath(price, '$..currencyCode')[0] seat_str = price.get('limitedSeats') product_id = jsonpath(price, '$..productId')[0] seat = 9 if not seat_str else int(seat_str.split(' ')[0]) item = LmdSpidersItem() item.update(dict( flightNumber=flightNumber, depAirport=dep, arrAirport=arr, carrier=self.carrier, depTime=dep_time, arrTime=arr_time, currency=currency, segments=json.dumps(seg), isChange=1, getTime=time.time(), fromCity=self.portCitys.get(dep, dep), toCity=self.portCitys.get(arr, arr), cabin='W', adultPrice=netfare, netFare=netfare, adultTax=0, maxSeats=seat, info=product_id )) yield item
def parse(self, response): html_content = response.body html_content = html_content.decode("utf-8") # 获取网页数据 # print(html_content) try: flight_results = re.search(r"var flightResults = (.*);", html_content) flight_data = flight_results.group(1) flight_data = json.loads(flight_data)[0] except: # print(html_content) self.log(html_content, 30) # yield response.request return currency_flag = re.search(r"'initCurrency', '(.*)'", html_content).group(1) if currency_flag not in self.currency_cache: pubUtil.send_email('new Currency from MM!', currency_flag) currency = self.currency_cache.get(currency_flag, 'CNY') for flights in flight_data: flightNumber = flights.get('flightNumber') depTime = flights.get("departureTime") depTime = time.mktime(time.strptime(depTime, '%Y/%m/%d %H:%M:%S')) arrTime = flights.get("arrivalTime") arrTime = time.mktime(time.strptime(arrTime, '%Y/%m/%d %H:%M:%S')) depAirport = flights.get("originCode") arrAirport = flights.get("destinationCode") adultTax = float(flights.get("taxAdult")) carrier = flightNumber[0:2] # isChange = flights.get("arrivalTime") # segments = flights.get("arrivalTime") getTime = time.time # fromCity = flights.get("origin") # toCity = flights.get("destination") fares = flights.get("fares") detail_message = fares.get("happy") if not detail_message: detail_message = fares.get('happlus') if not detail_message: detail_message = fares.get("prime") netFare = detail_message.get("fare") maxSeats = detail_message.get("seat") cabin = detail_message.get("bookingClass") adultPrice = netFare + adultTax item = LmdSpidersItem() item.update( dict( flightNumber=flightNumber, depAirport=depAirport, arrAirport=arrAirport, carrier=carrier, depTime=depTime, arrTime=arrTime, currency=currency, # segments=json.dumps([segment]), isChange=1, getTime=time.time(), fromCity=self.portCitys.get(depAirport, depAirport), toCity=self.portCitys.get(arrAirport, arrAirport), adultPrice=adultPrice, netFare=netFare, maxSeats=maxSeats, adultTax=adultTax, cabin=cabin, )) yield item
def parse(self, response): self.is_ok = True try: result = json.loads(response.text) except Exception as e: print(e) print(response.text) print(response.status) return try: tax_ad = jsonpath(result, '$..taxAd')[0] except Exception as e: self.log(e, 20) params = result.get('param') if not params: params = response.meta.get('payload') dep = params.get('depCity1') arr = params.get('arrCity1') date = params.get('depDate1') self.log('%s->%s on %s no data' % (dep, arr, date), 20) return fuel_ad = jsonpath(result, '$..fuelAd')[0] adult_tax = tax_ad + fuel_ad # 航班 list_fare = result.get('listItineraryFare') for item_fare in list_fare: dep_airport = item_fare.get('depCity') arr_airport = item_fare.get('arrCity') from_city = self.port_city.get(dep_airport, dep_airport) to_city = self.port_city.get(arr_airport, arr_airport) list_flight = item_fare.get('listFlight') for flight in list_flight: dep_str = flight.get('depDate') + flight.get('depTime') arr_str = flight.get('arrDate') + flight.get('arrTime') dep_time = time.mktime(time.strptime(dep_str, '%Y%m%d%H%M')) arr_time = time.mktime(time.strptime(arr_str, '%Y%m%d%H%M')) c, n = re.match(r'([A-Z]+)(\d+)', flight.get('flightNo')).groups() flight_number = c + n.lstrip('0') net_fare = 0 cabin = '' currency = '' seats = 0 # 获取最低价 list_cls = flight.get('listCls') for cl in list_cls: # 忽略1+1 EVENT促销机票 if cl.get('cls') == 'F': # print '#' * 66, '1+1' continue flag_fare = cl.get('priceAd') if not net_fare or net_fare > flag_fare: net_fare = flag_fare cabin = cl.get('cls') currency = cl.get('currency') seats = cl.get('avail') item = LmdSpidersItem() item.update( dict( flightNumber=flight_number, # 航班号 depTime=dep_time, # 出发时间 arrTime=arr_time, # 达到时间 fromCity=from_city, # 出发城市 toCity=to_city, # 到达城市 depAirport=dep_airport, # 出发机场 arrAirport=arr_airport, # 到达机场 currency=currency, # 货币种类 adultPrice=net_fare + adult_tax, # 成人票价 adultTax=adult_tax, # 税价 netFare=net_fare, # 净票价 maxSeats=seats, # 可预定座位数 cabin=cabin, # 舱位 carrier=flight_number[:2], # 航空公司 isChange=1, # 是否为中转 1.直达2.中转 segments="[]", # 中转时的各个航班信息 getTime=time.time(), )) yield item
def parse(self, response): self.isOK = True self.log('data is parseing.....', 20) # print(response.meta.get('data')) # print(response.body) _as = response.xpath('//div/a') for a in _as: try: flag = a.xpath('./@data-is-super').extract()[0] except: continue if flag == 'true': continue jour_key = a.xpath('./@data-journeykey').extract()[0] s = filter((lambda x: x), re.split(r'[~|\s]+', jour_key)) carrier, number, dep, dep_date, dep_time, arr, arr_date, arr_time = s flightNumber = carrier + number dep_ts = self.dt_to_ts('%s %s' % (dep_date, dep_time)) arr_ts = self.dt_to_ts('%s %s' % (arr_date, arr_time)) seats_str = a.xpath('./@data-free-places').extract() try: seats = int(seats_str[0]) except: seats = 9 jour_fare = a.xpath('./@data-journeyfare').extract()[0] fare_dict = json.loads(jour_fare)[0] tax = fare_dict.get('tax') netFare = fare_dict.get('farePrice') price_str = a.xpath('./@data-price-format').extract()[0] currency = self.currency_cache.get(price_str[0], 'EUR') price = float(price_str[1:]) fare_key = a.xpath('./@data-farekey').extract()[0] s_f = filter((lambda x: x), re.split(r'[~|\s]+', fare_key)) cabin = s_f[3] seg_dep = a.xpath('./@data-dept-date').extract()[0] + ':00' seg_arr = a.xpath('./@data-date').extract()[0] + ':00' segment = dict( flightNumber=flightNumber, aircraftType='', number=1, airline=carrier, dep=dep, dest=arr, departureTime=seg_dep, destinationTime=seg_arr, depTerminal='', seats=seats, duration=dataUtil.gen_duration(dep_ts, arr_ts), ) item = LmdSpidersItem() item.update( dict( flightNumber=flightNumber, depAirport=dep, arrAirport=arr, carrier=carrier, depTime=dep_ts, arrTime=arr_ts, currency=currency, segments=json.dumps([]), isChange=1, getTime=time.time(), fromCity=self.portCitys.get(dep, dep), toCity=self.portCitys.get(arr, arr), cabin=cabin, adultPrice=price, netFare=netFare, adultTax=tax, maxSeats=seats, )) # print item yield item
def parse(self, response): # print(response.body) null = '' true = 'true' false = 'false' try: data_dict = eval(response.text) except: logging.info('pls update headers') data_post = response.meta.get('data_post') self.get_headers() yield scrapy.Request( method='POST', url=self.start_urls[1], headers=self.custom_settings.get('DEFAULT_REQUEST_HEADERS'), body=json.dumps(data_post), meta={'data_post': data_post}, dont_filter=True, callback=self.parse) return # data_dict = json.loads(response.body) journeys = data_dict.get('Journeys') currency = data_dict.get('CurrencyCode') for journey in journeys: depAirport = journey.get('DepartureStation') arrAirport = journey.get('ArrivalStation') carrier = journey.get('CarrierCode') flightNumber = carrier + journey.get('FlightNumber') depTime = time.mktime( time.strptime(journey.get('STD'), '%Y-%m-%dT%H:%M:%S')) arrTime = time.mktime( time.strptime(journey.get('STA'), '%Y-%m-%dT%H:%M:%S')) fares = jsonpath(journey, '$..Fares')[0] lowFare = dict( adultPrice=0, netFare=0, maxSeats=0, adultTax=0, cabin='', ) # 增加套餐价格,先定义价格表 price_dict = {'Basic': 0, 'Middle': 0, 'Plus': 0, 'SUPER_ECO': 0} lowest = None for fare in fares: if fare.get( 'ProductClass') == 'WC': # 排除掉wizz club的价格,注释掉即是会员折扣价 continue paxFareTypes = fare.get('PaxFares')[0].get('PaxFareTypes') for paxfare in paxFareTypes: package_name = paxfare.get('PaxFareClass') package_price = max(jsonpath(paxfare, '$..Amount')) price_dict[package_name] = package_price paxfare = paxFareTypes[0] price = max(jsonpath(paxfare, '$..Amount')) netfare = paxfare.get('PureFarePriceAmount') lowFare['adultPrice'] = price lowFare['netFare'] = netfare lowFare['maxSeats'] = fare.get('AvailableCount') lowFare['cabin'] = fare.get('ProductClass') lowFare['adultTax'] = price - netfare break # segments = '%s:%s' % (price_dict.get('Middle'), price_dict.get('Plus')) segments = [[price_dict.get('Middle'), lowFare.get('maxSeats')], [price_dict.get('Plus'), lowFare.get('maxSeats')]] item = LmdSpidersItem() item.update( dict( flightNumber=flightNumber, depAirport=depAirport, arrAirport=arrAirport, carrier=carrier, depTime=depTime, arrTime=arrTime, currency=currency, segments=json.dumps(segments), isChange=1, getTime=time.time(), fromCity=self.portCitys.get(depAirport, depAirport), toCity=self.portCitys.get(arrAirport, arrAirport), )) item.update(lowFare) yield item
def parse(self, response): # print(response.body) data_dict = json.loads(response.body) try: avail_data = jsonpath(data_dict, '$..availabilityv2')[0] except Exception as e: print(e) print(response.body) post_data = response.meta.get('post_data') yield scrapy.Request( url=self.start_urls, method="POST", body=json.dumps(post_data), meta={'post_data': post_data}, dont_filter=True, ) return if not avail_data: # 当天无航班 return currency = avail_data.get('currencyCode') fares = avail_data.get('faresAvailable') fare_temp = dict() if fares: for fare in fares: fare_temp[fare['key']] = fare['value'] journeys = jsonpath(avail_data, '$..journeysAvailable')[0] for journey in journeys: flight_type = journey.get('flightType') if flight_type == 'Connect': # 排除掉非直达航班 continue legs = jsonpath(journey, '$..legs')[0] # 排除掉多停航班 if len(legs) > 2: continue s_cities = jsonpath(legs[1], '$..origin')[0] if len(legs) == 2 else '' is_change = 1 designator = journey.get('designator') dep_time_str = designator.get('departure') # %Y-%m-%dT%H:%M:%S arr_time_str = designator.get('arrival') # %Y-%m-%dT%H:%M:%S dep_time = time.mktime( time.strptime(dep_time_str, '%Y-%m-%dT%H:%M:%S')) arr_time = time.mktime( time.strptime(arr_time_str, '%Y-%m-%dT%H:%M:%S')) dep = designator.get('origin') arr = designator.get('destination') identifier = journey.get('segments')[0].get('identifier') carrier = identifier.get('carrierCode') flight_number = carrier + identifier.get('identifier') is_inter = jsonpath(journey, '$..international')[0] fail_count = 0 if is_inter else 99 if not fares or not journey.get('fares'): adult_price = 0 net_fare = 0 seats = 0 cabin = '' segments = [] else: fare_flag = journey.get('fares') low_fare = fare_flag[0] low_key = low_fare.get('key') low_prices = fare_temp.get(low_key) net_fare = jsonpath(low_prices, '$..discountedFare')[0] adult_price = jsonpath(low_prices, '$..fareAmount')[0] seats = jsonpath(low_fare, '$..availableCount')[0] cabin = jsonpath(low_fare, '$..fareCode')[0] # 国际线加350, 国内线加225 adult_price += 350 if is_inter else 225 # 套餐价格, 有问题。。。。。暂时未解决 keys = ['0', 'U'] segments = [[-1, -1]] * len(keys) for fare in fare_flag: key = fare.get('key') value = fare.get('value') flag = value.get('fareCode')[1] if flag in keys: index = keys.index(flag) else: continue seat_temp = value.get('availableCount') if not seat_temp: continue price_temp = jsonpath(fare_temp.get(key), '$..fareAmount')[0] price_temp += 350 if is_inter else 225 segments[index] = [price_temp, seat_temp] item = LmdSpidersItem() item.update( dict( flightNumber=flight_number, depAirport=dep, arrAirport=arr, carrier=carrier, depTime=dep_time, arrTime=arr_time, currency=currency, segments=json.dumps(segments), isChange=is_change, getTime=time.time(), fromCity=self.port_citys.get(dep, dep), toCity=self.port_citys.get(arr, arr), adultPrice=adult_price, netFare=net_fare, adultTax=adult_price - net_fare, maxSeats=seats, cabin=cabin, stopCities=s_cities, failCount=fail_count, )) yield item
def parse(self, response): data = json.loads(response.body) self.isOK = True code = data.get('code') if '33020' == code: self.log('no ticket', 20) return dt = response.meta.get('data').get('beginDate') journeys = jsonpath(data, '$..%s' % dt)[0] for journey in journeys: dep_airport = journey.get('beginCity') arr_airport = journey.get('endCity') from_city = self.port_city.get(dep_airport, dep_airport) to_city = self.port_city.get(arr_airport, arr_airport) fn = journey.get('flightNum') dep_time_str = '%s %s' % (journey.get('beginDate'), journey.get('beginDateTime')) arr_time_str = '%s %s' % (journey.get('endDate'), journey.get('endDateTime')) dep_time = time.mktime( time.strptime(dep_time_str, '%Y-%m-%d %H:%M:%S')) arr_time = time.mktime( time.strptime(arr_time_str, '%Y-%m-%d %H:%M:%S')) fares = journey.get('ta1') low_price = 0 low_fare = None for fare in fares: this_price = int(fare.get('flightPrice')) if not low_price or (low_price > this_price): low_fare = fare low_price = this_price adult_price = low_price cabin = low_fare.get('bookingClass') seats = low_fare.get('seat') if u'充足' == seats: seats = 10 else: seats = int(seats) item = LmdSpidersItem() item.update( dict( flightNumber=fn, # 航班号 depTime=dep_time, # 出发时间 arrTime=arr_time, # 达到时间 fromCity=from_city, # 出发城市 toCity=to_city, # 到达城市 depAirport=dep_airport, # 出发机场 arrAirport=arr_airport, # 到达机场 currency='CNY', # 货币种类 adultPrice=adult_price, # 成人票价 adultTax=0, # 税价 netFare=adult_price, # 净票价 maxSeats=seats, # 可预定座位数 cabin=cabin, # 舱位 carrier=fn[:2], # 航空公司 isChange=1, # 是否为中转 1.直达2.中转 segments="[]", # 中转时的各个航班信息 getTime=time.time(), )) yield item
def parse(self, response): try: response_dict = json.loads(response.body) journeys = jsonpath(response_dict, '$..Journeys') code = response_dict.get('Code') if not journeys or not len(journeys): if not int(code): return logging.info('not journeys... ') print(response.body) data_dict = response.meta.get('data_dict') yield scrapy.Request( method='POST', url=self.start_urls, body=json.dumps(data_dict), meta={'data_dict': data_dict}, dont_filter=True, callback=self.parse, errback=lambda x: self.download_errback(x, data_dict)) return currency = jsonpath(response.meta, '$..Currency')[0] journeys = journeys[0] for journey in journeys: infos = journey.get('TravelInfos') if len(infos) > 1: continue info = infos[0] dep = info.get('DepartureAirportCode') arr = info.get('ArrivalAirportCode') try: depTime = ddUtil.str_to_stamp(info.get('DepartureDate')) arrTime = ddUtil.str_to_stamp(info.get('ArrivalDate')) except: logging.info('the format of date is error!') self.isOK = False data_dict = response.meta.get('data_dict') data_info = data_dict.get('GetAvailabilityDetail') print(data_info.get('DepartureAirport'), data_info.get('ArrivalAirport'), data_info.get('BoardDate')) yield scrapy.Request( method='POST', url=self.start_urls, body=json.dumps(data_dict), meta={'data_dict': data_dict}, dont_filter=True, callback=self.parse, errback=lambda x: self.download_errback(x, data_dict)) break flightNumber = info.get('FlightNumber') aircraft = info.get('EquipmentType') cabin = info.get('ClassCode') duration = ddUtil.format_duration(info.get('TravelDuration')) carrier = info.get('CarrierCode') fare = journey.get('FlyFare') bagfare = journey.get('FlyBagFare') bageatfare = journey.get('FlyBagEatFare') lowfare = fare if not lowfare: seats, price, tax = 0, 0, 0 else: if bagfare and lowfare.get('FareAmount') > bagfare.get( 'FareAmount', 999999): lowfare = bagfare if bageatfare and lowfare.get( 'FareAmount') > bageatfare.get( 'FareAmount', 99999): lowfare = bageatfare seats = lowfare.get('AvailableCount') price = lowfare.get('FareAmount') tax = jsonpath(lowfare, '$..Amount')[0] if price == tax: logging.info('price == tax') self.isOK = False data_dict = response.meta.get('data_dict') yield scrapy.Request(method='POST', url=self.start_urls, body=json.dumps(data_dict), meta={'data_dict': data_dict}, dont_filter=True, callback=self.parse, errback=lambda x: self. download_errback(x, data_dict)) break self.isOK = True segment = dict( flightNumber=flightNumber, aircraftType=aircraft, number=1, airline=carrier, dep=dep, dest=arr, duration=duration, departureTime=dataUtil.format_seg_time(depTime), destinationTime=dataUtil.format_seg_time(arrTime), depTerminal='', seats=seats) item = LmdSpidersItem() item.update( dict( carrier=carrier, maxSeats=seats, flightNumber=flightNumber, depTime=depTime, arrTime=arrTime, depAirport=dep, arrAirport=arr, cabin=cabin, currency=currency, isChange=1, getTime=time.time(), adultPrice=price, adultTax=tax, netFare=price - tax, fromCity=self.portCitys.get(dep, dep), toCity=self.portCitys.get(arr, arr), segments=json.dumps([segment]), )) yield item except: traceback.print_exc() print(response.body)