def parseFlightAndTicket(content_temp, time_zone_A, time_zone_B): content = content_temp.encode('utf-8') content = content.replace('£', 'GBP') flights = {} tickets = [] result = {'ticket': tickets, 'flight': flights} flight = Flight() eachflight = EachFlight() try: content_json = json.loads(content) flight_content = content_json['Html'] flight_content = flight_content.replace('\n', '') flight.flight_no = 'EZY' + flight_no_pat.findall(flight_content)[0] flight.airline = 'easyjet' flight.dept_id = dept_id_pat.findall(flight_content)[0] flight.dest_id = dest_id_pat.findall(flight_content)[0] flight.dept_time = dept_time_pat.findall(flight_content)[0].replace( ' ', 'T') + ':00' flight.dest_time = dest_time_pat.findall(flight_content)[0].replace( ' ', 'T') + ':00' flight.price = price_pat.findall(flight_content)[0] flight.seat_type = '经济舱' flight.source = 'easyjet::easyjet' flight.currency = currency_pat.findall(flight_content)[0] flight.stop = 0 flight.dept_day = flight.dept_time.split('T')[0] flight.dur = durCal(flight.dept_time, flight.dest_time, time_zone_A, time_zone_B) eachflight.flight_key = flight.flight_no + '_' + flight.dept_id + '_' + flight.dest_id eachflight.flight_no = flight.flight_no eachflight.airline = 'easyjet' eachflight.dept_id = flight.dept_id eachflight.dest_id = flight.dest_id eachflight.dept_time = flight.dept_time eachflight.dest_time = flight.dest_time eachflight.dur = flight.dur flights[eachflight.flight_key] = (eachflight.flight_no, eachflight.airline, eachflight.plane_no, \ eachflight.dept_id, eachflight.dest_id, eachflight.dept_time, eachflight.dest_time, \ eachflight.dur) tickets = [(flight.flight_no, flight.plane_no, flight.airline, flight.dept_id, flight.dest_id, \ flight.dept_day, flight.dept_time, flight.dest_time, flight.dur, flight.price, \ flight.tax, flight.surcharge, flight.currency, flight.seat_type, \ flight.source, flight.return_rule, flight.stop)] result['flight'] = flights result['ticket'] = tickets #flight_info_json = flight_info_pat.findall(flight_content)[0] #print flight_info_json except Exception, e: print str(e) return result
def parseFlightAndTicket(content_temp, time_zone_A, time_zone_B): content = content_temp.encode('utf-8') content = content.replace('£', 'GBP') flights = {} tickets = [] result = {'ticket':tickets, 'flight':flights} flight = Flight() eachflight = EachFlight() try: content_json = json.loads(content) flight_content = content_json['Html'] flight_content = flight_content.replace('\n','') flight.flight_no = 'EZY' + flight_no_pat.findall(flight_content)[0] flight.airline = 'easyjet' flight.dept_id = dept_id_pat.findall(flight_content)[0] flight.dest_id = dest_id_pat.findall(flight_content)[0] flight.dept_time = dept_time_pat.findall(flight_content)[0].replace(' ','T') + ':00' flight.dest_time = dest_time_pat.findall(flight_content)[0].replace(' ','T') + ':00' flight.price = price_pat.findall(flight_content)[0] flight.seat_type = '经济舱' flight.source = 'easyjet::easyjet' flight.currency = currency_pat.findall(flight_content)[0] flight.stop = 0 flight.dept_day = flight.dept_time.split('T')[0] flight.dur = durCal(flight.dept_time, flight.dest_time, time_zone_A, time_zone_B) eachflight.flight_key = flight.flight_no + '_' + flight.dept_id + '_' + flight.dest_id eachflight.flight_no = flight.flight_no eachflight.airline = 'easyjet' eachflight.dept_id = flight.dept_id eachflight.dest_id = flight.dest_id eachflight.dept_time = flight.dept_time eachflight.dest_time = flight.dest_time eachflight.dur = flight.dur flights[eachflight.flight_key] = (eachflight.flight_no, eachflight.airline, eachflight.plane_no, \ eachflight.dept_id, eachflight.dest_id, eachflight.dept_time, eachflight.dest_time, \ eachflight.dur) tickets = [(flight.flight_no, flight.plane_no, flight.airline, flight.dept_id, flight.dest_id, \ flight.dept_day, flight.dept_time, flight.dest_time, flight.dur, flight.price, \ flight.tax, flight.surcharge, flight.currency, flight.seat_type, \ flight.source, flight.return_rule, flight.stop)] result['flight'] = flights result['ticket'] = tickets #flight_info_json = flight_info_pat.findall(flight_content)[0] #print flight_info_json except Exception, e: print str(e) return result
def eachflightParser(each_flight_content): eachflight = EachFlight() #print each_flight_content each_flight_dict = {} try: eachflight.flight_no = each_flight_content[2] + each_flight_content[10] eachflight.dept_id = each_flight_content[0] eachflight.dest_id = each_flight_content[1] eachflight.flight_key = eachflight.flight_no + '_' + eachflight.dept_id + \ '_' + eachflight.dest_id eachflight.plane_no = each_flight_content[12] try: eachflight.airline = airline_dict[each_flight_content[2]] except: logger.info('[AIRLINEERRORCODE]' + each_flight_content[2]) return each_flight_dict dept_time_day = each_flight_content[4] dept_time_mins = each_flight_content[5] dest_time_day = each_flight_content[6] dest_time_mins = each_flight_content[7] dept_time_day = day_calculator(dept_time_day) dest_time_day = day_calculator(dest_time_day) eachflight.dept_time = time_calculator(dept_time_day, dept_time_mins) eachflight.dest_time = time_calculator(dest_time_day, dest_time_mins) if each_flight_content[4] == each_flight_content[6]: eachflight.dur = (int(each_flight_content[7]) - int(each_flight_content[5]) + \ int(each_flight_content[11])) * 60 else: eachflight.dur = (int(each_flight_content[7]) - int(each_flight_content[5]) + \ int(each_flight_content[11]) + 1400 ) * 60 each_flight_dict[eachflight.flight_key] = (eachflight.flight_no, eachflight.airline, \ eachflight.plane_no, eachflight.dept_id, eachflight.dest_id, \ eachflight.dept_time, eachflight.dest_time, eachflight.dur) except Exception, e: return each_flight_dict
def parse_page(content, price_dict): tickets = [] results = {} flights = {} try: json_temp = json.loads(content) except: results['ticket'] = tickets results['flight'] = flights return results if json_temp['Status'] == 'SUCCESS': for each_flight_json in json_temp['datalist']: roundflight = RoundFlight() #print '---------------' try: flight_no = each_flight_json['Key'] #print flight_no flight_no_a = flight_no.split('^')[0] flight_no_b = flight_no.split('^')[1] a_num = len(flight_no_a.split('_')) b_num = len(flight_no_b.split('_')) roundflight.stop_A = a_num - 1 roundflight.stop_B = b_num - 1 roundflight.flight_no_A = flight_no_a roundflight.flight_no_B = flight_no_b roundflight.dept_id = each_flight_json['ODO'][0]['OL'] roundflight.dest_id = each_flight_json['ODO'][a_num]['OL'] #print roundflight.dept_id, roundflight.dest_id roundflight.dept_time_A = each_flight_json['ODO'][0]['DD'] roundflight.dest_time_A = each_flight_json['ODO'][a_num - 1]['AD'] if len(roundflight.dept_time_A) < 17: roundflight.dept_time_A = roundflight.dept_time_A + ':00' if len(roundflight.dest_time_A) < 17: roundflight.dest_time_A = roundflight.dest_time_A + ':00' #print roundflight.dept_time_A,roundflight.dest_time_A roundflight.dept_time_B = each_flight_json['ODO'][a_num]['DD'] roundflight.dest_time_B = each_flight_json['ODO'][-1]['AD'] if len(roundflight.dept_time_B) < 17: roundflight.dept_time_B = roundflight.dept_time_B + ':00' if len(roundflight.dest_time_B) < 17: roundflight.dest_time_B = roundflight.dest_time_B + ':00' #print roundflight.dept_time_B, roundflight.dest_time_B roundflight.dept_day = roundflight.dept_time_A.split('T')[0] roundflight.dest_day = roundflight.dept_time_B.split('T')[0] #roundflight.price = each_flight_json['AIP'][0]['EA'] price_key = roundflight.flight_no_A + '^' + roundflight.flight_no_B roundflight.price = price_dict[price_key] roundflight.tax = each_flight_json['AIP'][0]['TX'] roundflight.source = 'jijitong::jijitong' roundflight.seat_type_A = '经济舱' roundflight.seat_type_B = '经济舱' roundflight.currency = 'CNY' #print a_num #print roundflight.seat_type_A plane_no_a = '' airline_a = '' for dept_flight in each_flight_json['ODO'][:a_num]: plane_no_a = plane_no_a + dept_flight['EQ'].encode( 'utf-8') + '_' airline_a = airline_a + dept_flight['COA'].encode( 'utf-8') + '_' #print plane_no_a,airline_a roundflight.plane_no_A = plane_no_a[:-1] roundflight.airline_A = airline_a[:-1] #print roundflight.airline_A,roundflight.plane_no_A plane_no_b = '' airline_b = '' for dest_flight in each_flight_json['ODO'][a_num:]: plane_no_b = plane_no_b + dest_flight['EQ'].encode( 'utf-8') + '_' airline_b = airline_b + dest_flight['COA'].encode( 'utf-8') + '_' roundflight.plane_no_B = plane_no_b[:-1] roundflight.airline_B = airline_b[:-1] #print roundflight.airline_B,roundflight.plane_no_B if a_num == 1: dur_A_temp = each_flight_json['ODO'][0]['ET'] roundflight.dur_A = int(dur_A_temp) * 60 else: dur_A_temp = 0 dur_A_temp2 = 0 for dept_content in each_flight_json['ODO'][:a_num]: dur_A_temp += int(dept_content['ET']) * 60 for x in range(1, a_num): #print x dept_time_str = each_flight_json['ODO'][x - 1]['AD'] #print dept_time_str dest_time_str = each_flight_json['ODO'][x]['DD'] #print dest_time_str dur_A_temp2 += durCal(dept_time_str, dest_time_str) #print dur_A_temp2 roundflight.dur_A = dur_A_temp + dur_A_temp2 if b_num == 1: dur_B_temp = each_flight_json['ODO'][a_num]['ET'] roundflight.dur_B = int(dur_B_temp) * 60 else: dur_B_temp = 0 dur_B_temp2 = 0 for dept_content in each_flight_json['ODO'][a_num:]: dur_B_temp += int(dept_content['ET']) * 60 for x in range(a_num + 1, a_num + b_num): dept_time_str = each_flight_json['ODO'][x - 1]['AD'] dest_time_str = each_flight_json['ODO'][x]['DD'] dur_B_temp2 += durCal(dept_time_str, dest_time_str) roundflight.dur_B = dur_B_temp + dur_B_temp2 for eachflight_content in each_flight_json['ODO']: eachflight = EachFlight() try: eachflight.flight_no = eachflight_content['MA'] eachflight.dept_id = eachflight_content['OL'] eachflight.dest_id = eachflight_content['DL'] eachflight.airline = eachflight_content['COA'] eachflight.plane_no = eachflight_content['EQ'] eachflight.dept_time = eachflight_content['DD'] + ':00' eachflight.dest_time = eachflight_content['AD'] + ':00' eachflight.dur = int(eachflight_content['ET']) * 60 eachflight.flight_key = eachflight.flight_no + '_' + eachflight.dept_id + '_' + eachflight.dest_id eachflight_tuple = (eachflight.flight_no, eachflight.airline, eachflight.plane_no, eachflight.dept_id, \ eachflight.dest_id, eachflight.dept_time, eachflight.dest_time, eachflight.dur) flights[eachflight.flight_key] = eachflight_tuple except Exception, e: #logger.info('Parse this flight failed with error :' + str(e)) continue roundflight_tuple = (roundflight.dept_id, roundflight.dest_id, roundflight.dept_day, roundflight.dest_day, \ roundflight.price, roundflight.tax, roundflight.surcharge, roundflight.currency, roundflight.source, \ roundflight.return_rule, roundflight.flight_no_A, roundflight.airline_A, roundflight.plane_no_A, \ roundflight.dept_time_A, roundflight.dest_time_A, roundflight.dur_A, roundflight.seat_type_A, \ roundflight.stop_A, roundflight.flight_no_B, roundflight.airline_B, roundflight.plane_no_B, \ roundflight.dept_time_B, roundflight.dest_time_B, roundflight.dur_B, roundflight.seat_type_B, \ roundflight.stop_B) tickets.append(roundflight_tuple) except Exception, e: logger.error('Can not parse flight info!' + str(e)) continue
'DepartureDate'][6:-2] dest_time_tamp = each_flight_json['FlightSegment'][-1][ 'ArrivalDate'][6:-2] flight_time_json = each_flight_json['FlightSegment'] #parse eachflight content for each_flight_content in flight_time_json: try: eachflight = EachFlight() eachflight.airline = each_flight_content[ 'AirCompanyName'] #print eachflight.airline eachflight.dept_id = each_flight_content[ 'DepartureAirport'] #print eachflight.dept_id eachflight.dest_id = each_flight_content[ 'ArrivalAirport'] #print eachflight.dest_id #print each_flight_content['Equipment'] eachflight.plane_no = each_flight_content['Equipment'] eachflight.flight_no = each_flight_content[ 'AirCompanyCode'] + each_flight_content[ 'FlightNumber'] eachflight.flight_key = eachflight.flight_no + '_' + eachflight.dept_id + '_' + eachflight.dest_id #print each_flight_content['ArrivalDate'][6:-2] eachflight.dest_time = time.strftime('%Y-%m-%d %H:%M:%S', \ time.localtime(float(str(each_flight_content['ArrivalDate'][6:-2])[:-3]))).replace(' ','T') eachflight.dept_time = time.strftime('%Y-%m-%d %H:%M:%S', \ time.localtime(float(str(each_flight_content['DepartureDate'][6:-2])[:-3]))).replace(' ','T') fly_time_content = each_flight_content['FlyTime'] #print fly_time_content
def ceair_page_parser(content): flights = {} tickets = [] infos = json.loads(content[content.find('{'):]) if infos['resultMsg'] != '': return tickets, flights currency = infos['currency'] all_flights = infos['tripItemList'][0]['airRoutingList'] for one_flight in all_flights: flight_info = one_flight['flightList'] flight = Flight() flight.source = 'ceair::ceair' flight.stop = len(flight_info) - 1 flight.currency = currency flight_nos = [] plane_types = [] airlines = [] durings = [] wait_times = [] flight.dept_id = flight_info[0]['deptCd'] flight.dest_id = flight_info[-1]['arrCd'] flight.dept_time = standard_timeformatter(flight_info[0]['deptTime']) flight.dest_time = standard_timeformatter(flight_info[-1]['arrTime']) flight.dept_day = flight_info[0]['deptTime'].split(' ')[0] for item in flight_info: eachflight = EachFlight() eachflight.flight_no = item['flightNo'] eachflight.airline = '东方航空' eachflight.plane_no = item['acfamily'] eachflight.dept_id = item['deptCd'] eachflight.dest_id = item['arrCd'] eachflight.dept_time = standard_timeformatter(item['deptTime']) eachflight.dest_time = standard_timeformatter(item['arrTime']) eachflight.dur = hm_to_sec(item['duration']) eachflight.flight_key = eachflight.flight_no + '_' + eachflight.dept_id + '_' + eachflight.dest_id flights[eachflight.flight_key] = (eachflight.flight_no, eachflight.airline, eachflight.plane_no, eachflight.dept_id, eachflight.dest_id, eachflight.dept_time, eachflight.dest_time, eachflight.dur) flight_nos.append(eachflight.flight_no) plane_types.append(eachflight.plane_no) airlines.append(eachflight.airline) durings.append(eachflight.dur) wait_times.append(hm_to_sec(item['stayTime'])) flight.flight_no = '' for flight_no in flight_nos: flight.flight_no = flight.flight_no + flight_no + '_' flight.flight_no = flight.flight_no[:-1] flight.plane_no = '' for plane_type in plane_types: flight.plane_no = flight.plane_no + plane_type + '_' flight.plane_no = flight.plane_no[:-1] flight.airline = '' for airline in airlines: flight.airline = flight.airline + airline + '_' flight.airline = flight.airline[:-1] flight.dur = 0 for during in durings: flight.dur = flight.dur + during for wait_time in wait_times: flight.dur = flight.dur + wait_time if one_flight['priceDisp']['economy'] != '': flight.seat_type = '经济舱' flight.price = int(one_flight['priceDisp']['economy']) flight_tuple = (flight.flight_no,flight.plane_no,flight.airline,flight.dept_id,flight.dest_id,flight.dept_day,\ flight.dept_time,flight.dest_time,flight.dur,flight.price,flight.tax,flight.surcharge,\ flight.currency,flight.seat_type,flight.source,flight.return_rule,flight.stop) tickets.append(flight_tuple) if one_flight['priceDisp']['business'] != '': flight.seat_type = '商务舱' flight.price = int(one_flight['priceDisp']['business']) flight_tuple = (flight.flight_no,flight.plane_no,flight.airline,flight.dept_id,flight.dest_id,flight.dept_day,\ flight.dept_time,flight.dest_time,flight.dur,flight.price,flight.tax,flight.surcharge,\ flight.currency,flight.seat_type,flight.source,flight.return_rule,flight.stop) tickets.append(flight_tuple) return tickets, flights
tickets.append(flight_tuple) for segment in segments: route_flights = segment['flights'] for single_flight in route_flights: each_flight = EachFlight() each_flight.flight_no = single_flight['flightNumber'] try: each_flight.airline = Airline[single_flight['airCo']] except: each_flight.airline = single_flight['airCo'] each_flight.plane_no = single_flight['equipType'] each_flight.dept_id = single_flight['fromAirport'] each_flight.dest_id = single_flight['toAirport'] each_flight.dept_time = CalDateTime(single_flight['fromDate'], single_flight['fromTime']) each_flight.dest_time = CalDateTime(single_flight['toDate'], single_flight['toTime']) each_flight.dur = CalDur(single_flight['duration']) #print '--------' #print each_flight.flight_no #print each_flight.airline #print each_flight.plane_no #print each_flight.dept_id, each_flight.dest_id, each_flight.dept_time, each_flight.dest_time, each_flight.dur each_flight.flight_key = each_flight.flight_no + '_' + each_flight.dept_id + '_' + each_flight.dest_id flights[each_flight.flight_key] = (each_flight.flight_no, each_flight.airline, \ each_flight.plane_no, each_flight.dept_id, each_flight.dest_id, \ each_flight.dept_time, each_flight.dest_time, each_flight.dur)
def elong_page_parser(htmlcontent): ''' ''' tickets = [] flights = {} if htmlcontent.find('您访问的页面不存在或暂时无法访问') != -1: return tickets, flights try: flights_json = flightsPattern.findall(htmlcontent)[0] allflights = json.loads(flights_json)['FlightLegList'] for flightInfo in allflights: flight = Flight() flight.currency = 'CNY' flight.seat_type = '经济舱' flight.stop = len(flightInfo['segs']) - 1 flight.price = int(flightInfo['cabs'][0]['oprice']) flight.tax = int(flightInfo['tax']) flight.source = 'elong::elong' flight.airline = '' flight.plane_no = '' flight.flight_no = '' flight.dur = 0 for singleflightInfo in flightInfo['segs']: eachFlight = EachFlight() eachFlight.flight_no = singleflightInfo['fltno'] eachFlight.plane_no = singleflightInfo['plane'] eachFlight.airline = singleflightInfo['corpn'] eachFlight.dept_id = singleflightInfo['dport'] eachFlight.dest_id = singleflightInfo['aport'] eachFlight.dept_time = time_shifter(singleflightInfo['dtime']) #convert to 2014-07-11T12:06:00 eachFlight.dest_time = time_shifter(singleflightInfo['atime']) eachFlight.dur = int(singleflightInfo['ftime']) * 60 eachFlight.flight_key = eachFlight.flight_no + '_' + eachFlight.dept_id + '_' + eachFlight.dest_id flights[eachFlight.flight_key] = (eachFlight.flight_no, eachFlight.airline, eachFlight.plane_no, eachFlight.dept_id, \ eachFlight.dest_id, eachFlight.dept_time, eachFlight.dest_time, eachFlight.dur) flight.airline = flight.airline + eachFlight.airline + '_' flight.plane_no = flight.plane_no + eachFlight.plane_no + '_' flight.flight_no = flight.flight_no + eachFlight.flight_no + '_' flight.dur += eachFlight.dur if len(flightInfo['segs']) > 1: for i in range(0, len(flightInfo['segs']) - 1): flight.dur += cal_wait_time(time_shifter(flightInfo['segs'][i]['atime']), time_shifter(flightInfo['segs'][i+1]['dtime'])) flight.flight_no = flight.flight_no[:-1] flight.plane_no = flight.plane_no[:-1] flight.airline = flight.airline[:-1] flight.dept_id = flightInfo['segs'][0]['dport'] flight.dest_id = flightInfo['segs'][-1]['aport'] flight.dept_time = time_shifter(flightInfo['segs'][0]['dtime']) flight.dest_time = time_shifter(flightInfo['segs'][-1]['atime']) flight.dept_day = flight.dept_time.split('T')[0] flight_tuple = (flight.flight_no,flight.plane_no,flight.airline,flight.dept_id,flight.dest_id,flight.dept_day,\ flight.dept_time,flight.dest_time,flight.dur,flight.price,flight.tax,flight.surcharge,flight.currency,\ flight.seat_type,flight.source,flight.return_rule,flight.stop) tickets.append(flight_tuple) except Exception, e: logger.info(str(e)) return [], {}
def parse_page(content, price_dict): flights = {} tickets = [] result = {'ticket':tickets, 'flight':flights} try: json_temp = json.loads(content) except: return result if json_temp['Status'] == 'SUCCESS': for each_flight_json in json_temp['datalist']: flight = Flight() try: flight.flight_no = each_flight_json['Key'] flight.stop = int(each_flight_json['OW']) flight.price = price_dict[flight.flight_no] #error price flight.tax = each_flight_json['AIP'][0]['TX'] flight.dept_id = each_flight_json['ODO'][0]['OL'] flight.dest_id = each_flight_json['ODO'][-1]['DL'] flight.dept_time = each_flight_json['ODO'][0]['DD'] + ':00' flight.dest_time = each_flight_json['ODO'][-1]['AD'] + ':00' flight.currency = 'CNY' flight.source = 'jijitong::jijitong' flight.seat_type = '经济舱' flight.dept_day = flight.dept_time.split('T')[0] flight_num = len(flight.flight_no.split('_')) if flight_num == 1: dur_A_temp = each_flight_json['ODO'][0]['ET'] flight.dur = int(dur_A_temp) * 60 else: dur_A_temp = 0 dur_A_temp2 = 0 for dept_content in each_flight_json['ODO'][:flight_num]: dur_A_temp += int(dept_content['ET']) * 60 for x in range(1,flight_num): #print x dept_time_str = each_flight_json['ODO'][x-1]['AD'] #print dept_time_str dest_time_str = each_flight_json['ODO'][x]['DD'] #print dest_time_str dur_A_temp2 += durCal(dept_time_str, dest_time_str) #print dur_A_temp2 flight.dur = dur_A_temp + dur_A_temp2 plane_no = '' airline = '' for each_json_temp in each_flight_json['ODO']: plane_no = plane_no + each_json_temp['EQ'] + '_' airline = airline + each_json_temp['COA'] + '_' try: eachflight = EachFlight() eachflight.flight_no = each_json_temp['MA'] eachflight.dept_id = each_json_temp['OL'] eachflight.dest_id = each_json_temp['DL'] eachflight.airline = each_json_temp['COA'] eachflight.plane_no = each_json_temp['EQ'] eachflight.dept_time = each_json_temp['DD'] + ':00' eachflight.dest_time = each_json_temp['AD'] + ':00' eachflight.dur = int(each_json_temp['ET']) * 60 eachflight.flight_key = eachflight.flight_no + '_' + eachflight.dept_id + '_' + eachflight.dest_id eachflight_tuple = (eachflight.flight_no, eachflight.airline, eachflight.plane_no, eachflight.dept_id, \ eachflight.dest_id, eachflight.dept_time, eachflight.dest_time, eachflight.dur) flights[eachflight.flight_key] = eachflight_tuple #print eachflight_tuple except Exception, e: print str(e) continue flight.plane_no = plane_no[:-1] flight.airline = airline[:-1] flight_tuple = (flight.flight_no, flight.plane_no, flight.airline, flight.dept_id, \ flight.dest_id, flight.dept_day, flight.dept_time, flight.dest_time, \ flight.dur, flight.price, flight.tax, flight.surcharge, flight.currency, \ flight.seat_type, flight.source, flight.return_rule, flight.stop) tickets.append(flight_tuple) except Exception,e: logger.error('Can not parse flight info!' + str(e)) continue
try: airline_temp = airline_dict[each_flight_content[2]] except: logger.info('[AIRLINECODE]' + each_flight_content[2]) airline += airline_temp + '_' eachflight = EachFlight() #print each_flight_content try: eachflight.flight_no = each_flight_content[2] + each_flight_content[10] eachflight.dept_id = each_flight_content[0] eachflight.dest_id = each_flight_content[1] eachflight.flight_key = eachflight.flight_no + '_' + eachflight.dept_id + \ '_' + eachflight.dest_id eachflight.plane_no = each_flight_content[12] try: eachflight.airline = airline_dict[each_flight_content[2]] except: logger.info('[AIRLINEERRORCODE]' + each_flight_content[2]) continue dept_time_day = each_flight_content[4] dept_time_mins = each_flight_content[5] dest_time_day = each_flight_content[6] dest_time_mins = each_flight_content[7]
def parse_page(content, price_dict): tickets = [] results = {} flights = {} try: json_temp = json.loads(content) except: results['ticket'] = tickets results['flight'] = flights return results if json_temp['Status'] == 'SUCCESS': for each_flight_json in json_temp['datalist']: roundflight = RoundFlight() #print '---------------' try: flight_no = each_flight_json['Key'] #print flight_no flight_no_a = flight_no.split('^')[0] flight_no_b = flight_no.split('^')[1] a_num = len(flight_no_a.split('_')) b_num = len(flight_no_b.split('_')) roundflight.stop_A = a_num - 1 roundflight.stop_B = b_num - 1 roundflight.flight_no_A = flight_no_a roundflight.flight_no_B = flight_no_b roundflight.dept_id = each_flight_json['ODO'][0]['OL'] roundflight.dest_id = each_flight_json['ODO'][a_num]['OL'] #print roundflight.dept_id, roundflight.dest_id roundflight.dept_time_A = each_flight_json['ODO'][0]['DD'] roundflight.dest_time_A = each_flight_json['ODO'][a_num-1]['AD'] if len(roundflight.dept_time_A) < 17: roundflight.dept_time_A = roundflight.dept_time_A + ':00' if len(roundflight.dest_time_A) < 17: roundflight.dest_time_A = roundflight.dest_time_A + ':00' #print roundflight.dept_time_A,roundflight.dest_time_A roundflight.dept_time_B = each_flight_json['ODO'][a_num]['DD'] roundflight.dest_time_B = each_flight_json['ODO'][-1]['AD'] if len(roundflight.dept_time_B) < 17: roundflight.dept_time_B = roundflight.dept_time_B + ':00' if len(roundflight.dest_time_B) < 17: roundflight.dest_time_B = roundflight.dest_time_B + ':00' #print roundflight.dept_time_B, roundflight.dest_time_B roundflight.dept_day = roundflight.dept_time_A.split('T')[0] roundflight.dest_day = roundflight.dept_time_B.split('T')[0] #roundflight.price = each_flight_json['AIP'][0]['EA'] price_key = roundflight.flight_no_A + '^' + roundflight.flight_no_B roundflight.price = price_dict[price_key] roundflight.tax = each_flight_json['AIP'][0]['TX'] roundflight.source = 'jijitong::jijitong' roundflight.seat_type_A = '经济舱' roundflight.seat_type_B = '经济舱' roundflight.currency = 'CNY' #print a_num #print roundflight.seat_type_A plane_no_a = '' airline_a = '' for dept_flight in each_flight_json['ODO'][:a_num]: plane_no_a = plane_no_a + dept_flight['EQ'].encode('utf-8') + '_' airline_a = airline_a + dept_flight['COA'].encode('utf-8') + '_' #print plane_no_a,airline_a roundflight.plane_no_A = plane_no_a[:-1] roundflight.airline_A = airline_a[:-1] #print roundflight.airline_A,roundflight.plane_no_A plane_no_b = '' airline_b = '' for dest_flight in each_flight_json['ODO'][a_num:]: plane_no_b = plane_no_b + dest_flight['EQ'].encode('utf-8') + '_' airline_b = airline_b + dest_flight['COA'].encode('utf-8') + '_' roundflight.plane_no_B = plane_no_b[:-1] roundflight.airline_B = airline_b[:-1] #print roundflight.airline_B,roundflight.plane_no_B if a_num == 1: dur_A_temp = each_flight_json['ODO'][0]['ET'] roundflight.dur_A = int(dur_A_temp) * 60 else: dur_A_temp = 0 dur_A_temp2 = 0 for dept_content in each_flight_json['ODO'][:a_num]: dur_A_temp += int(dept_content['ET']) * 60 for x in range(1,a_num): #print x dept_time_str = each_flight_json['ODO'][x-1]['AD'] #print dept_time_str dest_time_str = each_flight_json['ODO'][x]['DD'] #print dest_time_str dur_A_temp2 += durCal(dept_time_str, dest_time_str) #print dur_A_temp2 roundflight.dur_A = dur_A_temp + dur_A_temp2 if b_num == 1: dur_B_temp = each_flight_json['ODO'][a_num]['ET'] roundflight.dur_B = int(dur_B_temp) * 60 else: dur_B_temp = 0 dur_B_temp2 = 0 for dept_content in each_flight_json['ODO'][a_num:]: dur_B_temp += int(dept_content['ET']) * 60 for x in range(a_num+1,a_num+b_num): dept_time_str = each_flight_json['ODO'][x-1]['AD'] dest_time_str = each_flight_json['ODO'][x]['DD'] dur_B_temp2 += durCal(dept_time_str, dest_time_str) roundflight.dur_B = dur_B_temp + dur_B_temp2 for eachflight_content in each_flight_json['ODO']: eachflight = EachFlight() try: eachflight.flight_no = eachflight_content['MA'] eachflight.dept_id = eachflight_content['OL'] eachflight.dest_id = eachflight_content['DL'] eachflight.airline = eachflight_content['COA'] eachflight.plane_no = eachflight_content['EQ'] eachflight.dept_time = eachflight_content['DD'] + ':00' eachflight.dest_time = eachflight_content['AD'] + ':00' eachflight.dur = int(eachflight_content['ET']) * 60 eachflight.flight_key = eachflight.flight_no + '_' + eachflight.dept_id + '_' + eachflight.dest_id eachflight_tuple = (eachflight.flight_no, eachflight.airline, eachflight.plane_no, eachflight.dept_id, \ eachflight.dest_id, eachflight.dept_time, eachflight.dest_time, eachflight.dur) flights[eachflight.flight_key] = eachflight_tuple except Exception, e: #logger.info('Parse this flight failed with error :' + str(e)) continue roundflight_tuple = (roundflight.dept_id, roundflight.dest_id, roundflight.dept_day, roundflight.dest_day, \ roundflight.price, roundflight.tax, roundflight.surcharge, roundflight.currency, roundflight.source, \ roundflight.return_rule, roundflight.flight_no_A, roundflight.airline_A, roundflight.plane_no_A, \ roundflight.dept_time_A, roundflight.dest_time_A, roundflight.dur_A, roundflight.seat_type_A, \ roundflight.stop_A, roundflight.flight_no_B, roundflight.airline_B, roundflight.plane_no_B, \ roundflight.dept_time_B, roundflight.dest_time_B, roundflight.dur_B, roundflight.seat_type_B, \ roundflight.stop_B) tickets.append(roundflight_tuple) except Exception,e: logger.error('Can not parse flight info!' + str(e)) continue
tickets.append(flight_tuple) for segment in segments: route_flights = segment['flights'] for single_flight in route_flights: each_flight = EachFlight() each_flight.flight_no = single_flight['flightNumber'] try: each_flight.airline = Airline[single_flight['airCo']] except: each_flight.airline = single_flight['airCo'] each_flight.plane_no = single_flight['equipType'] each_flight.dept_id = single_flight['fromAirport'] each_flight.dest_id = single_flight['toAirport'] each_flight.dept_time = CalDateTime(single_flight['fromDate'], single_flight['fromTime']) each_flight.dest_time = CalDateTime(single_flight['toDate'], single_flight['toTime']) each_flight.dur = CalDur(single_flight['duration']) #print '--------' #print each_flight.flight_no #print each_flight.airline #print each_flight.plane_no #print each_flight.dept_id, each_flight.dest_id, each_flight.dept_time, each_flight.dest_time, each_flight.dur each_flight.flight_key = each_flight.flight_no + '_' + each_flight.dept_id + '_' + each_flight.dest_id flights[each_flight.flight_key] = (each_flight.flight_no, each_flight.airline, \
flight.seat_type = '经济舱' flight.stop = len(segments) - 1 flight.source = source for segment in segments: flight_aircorp += segment['airlineName'] + '_' flight_plane += segment['aircraftCode'].split(' ')[-1] + '_' #Airbus A330 -> A330 flight_no += segment['airlineCode'] + segment['flightNumber'] + '_' #拼接航空公司代码和航班代码 singleflight = EachFlight() singleflight.flight_no = segment['airlineCode'] + segment['flightNumber'] singleflight.plane_no = segment['aircraftCode'] singleflight.airline = segment['airlineName'] singleflight.dept_id = segment['departureAirportCode'] singleflight.dest_id = segment['arrivalAirportCode'] singleflight.dept_time = timeshifter(segment['departureTime']) singleflight.dest_time = timeshifter(segment['arrivalTime']) singleflight.dur = int(segment['duration'] * 60) singleflight.flight_key = singleflight.flight_no + '_' + singleflight.dept_id + '_' + singleflight.dest_id singleflight_tuple = (singleflight.flight_no, singleflight.airline, singleflight.plane_no, singleflight.dept_id, singleflight.dest_id, \ singleflight.dept_time, singleflight.dest_time, singleflight.dur) singleFlights[singleflight.flight_key] = singleflight_tuple flight.flight_no = flight_no[:-1] flight.plane_no = flight_plane[:-1] flight.airline = flight_aircorp[:-1] flights.append(flight)
def elong_page_parser(htmlcontent): ''' ''' tickets = [] flights = {} if htmlcontent.find('您访问的页面不存在或暂时无法访问') != -1: return tickets, flights try: flights_json = flightsPattern.findall(htmlcontent)[0] allflights = json.loads(flights_json)['FlightLegList'] for flightInfo in allflights: flight = Flight() flight.currency = 'CNY' flight.seat_type = '经济舱' flight.stop = len(flightInfo['segs']) - 1 flight.price = int(flightInfo['cabs'][0]['oprice']) flight.tax = int(flightInfo['tax']) flight.source = 'elong::elong' flight.airline = '' flight.plane_no = '' flight.flight_no = '' flight.dur = 0 for singleflightInfo in flightInfo['segs']: eachFlight = EachFlight() eachFlight.flight_no = singleflightInfo['fltno'] eachFlight.plane_no = singleflightInfo['plane'] eachFlight.airline = singleflightInfo['corpn'] eachFlight.dept_id = singleflightInfo['dport'] eachFlight.dest_id = singleflightInfo['aport'] eachFlight.dept_time = time_shifter( singleflightInfo['dtime']) #convert to 2014-07-11T12:06:00 eachFlight.dest_time = time_shifter(singleflightInfo['atime']) eachFlight.dur = int(singleflightInfo['ftime']) * 60 eachFlight.flight_key = eachFlight.flight_no + '_' + eachFlight.dept_id + '_' + eachFlight.dest_id flights[eachFlight.flight_key] = (eachFlight.flight_no, eachFlight.airline, eachFlight.plane_no, eachFlight.dept_id, \ eachFlight.dest_id, eachFlight.dept_time, eachFlight.dest_time, eachFlight.dur) flight.airline = flight.airline + eachFlight.airline + '_' flight.plane_no = flight.plane_no + eachFlight.plane_no + '_' flight.flight_no = flight.flight_no + eachFlight.flight_no + '_' flight.dur += eachFlight.dur if len(flightInfo['segs']) > 1: for i in range(0, len(flightInfo['segs']) - 1): flight.dur += cal_wait_time( time_shifter(flightInfo['segs'][i]['atime']), time_shifter(flightInfo['segs'][i + 1]['dtime'])) flight.flight_no = flight.flight_no[:-1] flight.plane_no = flight.plane_no[:-1] flight.airline = flight.airline[:-1] flight.dept_id = flightInfo['segs'][0]['dport'] flight.dest_id = flightInfo['segs'][-1]['aport'] flight.dept_time = time_shifter(flightInfo['segs'][0]['dtime']) flight.dest_time = time_shifter(flightInfo['segs'][-1]['atime']) flight.dept_day = flight.dept_time.split('T')[0] flight_tuple = (flight.flight_no,flight.plane_no,flight.airline,flight.dept_id,flight.dest_id,flight.dept_day,\ flight.dept_time,flight.dest_time,flight.dur,flight.price,flight.tax,flight.surcharge,flight.currency,\ flight.seat_type,flight.source,flight.return_rule,flight.stop) tickets.append(flight_tuple) except Exception, e: logger.info(str(e)) return [], {}