def ParseInterPage(page):
    '''
    '''
    data = jsonlib.read(page.decode("GBK", "ignore"))

    allinfo = []

    for node in data["FlightList"]:
        dept_time = datetime.datetime.strptime(node["DepartTime"],
                                               '%Y-%m-%d %H:%M:%S')
        dept_time = str(dept_time).replace(
            ' ',
            'T',
        )
        dest_time = datetime.datetime.strptime(node["ArrivalTime"],
                                               '%Y-%m-%d %H:%M:%S')
        dest_time = str(dest_time).replace(
            ' ',
            'T',
        )
        # 航班信息
        flight = Flight()
        flight.flight_no = ''
        flight.plane_no = ''
        flight.airline = ''
        dept_id_list = []

        for flightNode in node["FlightDetail"]:
            flight.flight_no = flight.flight_no + flightNode["FlightNo"] + '_'
            flight.airline = flight.airline + flightNode["AirlineName"] + '_'
            flight.plane_no = flight.plane_no + flightNode["CraftType"] + '_'
            dept_id_list.append(flightNode["DPort"])
            flight.dest_id = flightNode["APort"]

        flight.stop = len(dept_id_list)
        flight.dept_id = dept_id_list[0]
        flight.flight_no = flight.flight_no[:-1]
        flight.airline = flight.airline[:-1]
        flight.plane_no = flight.plane_no[:-1]

        flight.dept_time = dept_time
        flight.dest_time = dest_time
        flight.dept_day = flight.dept_time.split('T')[0]

        flight.price = int(node["Price"])
        flight.surcharge = int(
            GetPriceByClass(node["OilFeeImage"], TaxPriceClasses))
        flight.tax = int((GetPriceByClass(node["TaxImage"], TaxPriceClasses)))

        flight.dur = int(node["FlightTime"]) * 60  #飞行时长,s
        flight.currency = "CNY"
        flight.source = "ctrip::ctrip"
        flight.seat_type = node["ClassName"]

        allinfo.append((flight.flight_no,flight.plane_no,flight.airline,flight.dept_id,flight.dest_id,\
                flight.dept_day,flight.dept_time,flight.dest_time,flight.dur,flight.price,flight.tax,\
                flight.surcharge,flight.currency,flight.seat_type,flight.source,flight.return_rule,flight.stop))

    return allinfo
def ParseInterPage(page):
    '''
    '''
    data = jsonlib.read(page.decode("GBK", "ignore"))

    allinfo = []

    for node in data["FlightList"]:
        dept_time = datetime.datetime.strptime(node["DepartTime"], '%Y-%m-%d %H:%M:%S')
        dept_time = str(dept_time).replace(' ','T',)
        dest_time = datetime.datetime.strptime(node["ArrivalTime"], '%Y-%m-%d %H:%M:%S') 
        dest_time = str(dest_time).replace(' ','T',)
        # 航班信息
        flight = Flight()
        flight.flight_no = ''
        flight.plane_no = ''
        flight.airline = ''
        dept_id_list = []

        for flightNode in node["FlightDetail"]:
            flight.flight_no = flight.flight_no + flightNode["FlightNo"] + '_'
            flight.airline = flight.airline + flightNode["AirlineName"] + '_'
            flight.plane_no = flight.plane_no + flightNode["CraftType"] + '_'
            dept_id_list.append(flightNode["DPort"])
            flight.dest_id = flightNode["APort"] 

        flight.stop = len(dept_id_list)
        flight.dept_id = dept_id_list[0]
        flight.flight_no = flight.flight_no[:-1]
        flight.airline = flight.airline[:-1]
        flight.plane_no = flight.plane_no[:-1]

        flight.dept_time = dept_time
        flight.dest_time = dest_time
        flight.dept_day = flight.dept_time.split('T')[0]
        
        flight.price = int(node["Price"])
        flight.surcharge = int(GetPriceByClass(node["OilFeeImage"], TaxPriceClasses))
        flight.tax = int((GetPriceByClass(node["TaxImage"], TaxPriceClasses)))

        flight.dur = int(node["FlightTime"]) * 60 #飞行时长,s
        flight.currency = "CNY"
        flight.source = "ctrip::ctrip"
        flight.seat_type = node["ClassName"]

        allinfo.append((flight.flight_no,flight.plane_no,flight.airline,flight.dept_id,flight.dest_id,\
                flight.dept_day,flight.dept_time,flight.dest_time,flight.dur,flight.price,flight.tax,\
                flight.surcharge,flight.currency,flight.seat_type,flight.source,flight.return_rule,flight.stop))
        
    return allinfo
Exemple #3
0
def ParsePage(tree, params):
    allinfo = []
    nodes = tree.xpath("//div[@class='search_box']")

    for node in nodes:
        # 航班信息
        flight = Flight()
        flight.flight_no = GetFlightNo(node.get("id"))
        strs = node.get("data").split("|")
        flight.dept_id = strs[2]
        flight.dest_id = strs[3]
        flight.airline = GetTextByXpath(
            node, "table[1]/tr/td[1]/div[1]/span/text()")
        flight.plane_no = GetAlphanumeric(
            GetAllText(node.xpath("table[1]/tr/td[1]/div[2]/span")[0]))

        airport_tax, fuel_surcharge = GetTax(
            GetTextByXpath(node, "table[1]/tr/td[5]/div[1]/text()"))

        priceNodes = node.xpath("table[@class='search_table']/tr")
        for priceNode in priceNodes:
            # 机票信息
            flight.dept_time = str(
                datetime.datetime.strptime(strs[0],
                                           '%Y-%m-%d %H:%M:%S')).replace(
                                               ' ',
                                               'T',
                                           )
            flight.dest_time = str(
                datetime.datetime.strptime(strs[1],
                                           '%Y-%m-%d %H:%M:%S')).replace(
                                               ' ',
                                               'T',
                                           )
            flight.dept_day = flight.dept_time.strftime('%Y-%m-%d')

            flight.price = int(GetTextByXpath(priceNode, "td[7]/span/text()"))
            flight.tax = int(airport_tax)
            flight.surcharge = int(fuel_surcharge)
            flight.currency = "CNY"
            flight.source = "ctrip::ctrip"
            flight.seat_type = GetAllText(priceNode.xpath("td[2]")[0])

            allinfo.append((flight.flight_no,flight.plane_no,flight.airline,flight.dept_id,flight.dest_id,\
                        flight.dept_day,flight.dept_time,flight.dest_time,flight.dur,flight.price,flight.tax,\
                        flight.surcharge,flight.currency,flight.seat_type,flight.source,flight.return_rule,flight.stop))

    return allinfo
def ParsePage(tree):
    allinfo = []
    nodes = tree.xpath("//div[@class='search_box']")

    for node in nodes:
        # 航班信息
        flight = Flight()
        flight.flight_no = GetFlightNo(node.get("id"))
        strs = node.get("data").split("|")
        flight.dept_id = strs[2]
        flight.dest_id = strs[3]
        flight.airline = GetTextByXpath(node, "table[1]/tr/td[1]/div[1]/span/text()")
        flight.plane_no = GetAlphanumeric(GetAllText(node.xpath("table[1]/tr/td[1]/div[2]/span")[0]))
        
        airport_tax, fuel_surcharge = GetTax(GetTextByXpath(node, "table[1]/tr/td[5]/div[1]/text()"))

        priceNodes = node.xpath("table[@class='search_table']/tr")
        for priceNode in priceNodes:
            # 机票信息
            flight.dept_time = str(datetime.datetime.strptime(strs[0], '%Y-%m-%d %H:%M:%S')).replace(' ','T',)
            flight.dest_time = str(datetime.datetime.strptime(strs[1], '%Y-%m-%d %H:%M:%S')).replace(' ','T',)
            flight.dept_day = flight.dept_time.strftime('%Y-%m-%d')

            flight.price = int(GetTextByXpath(priceNode, "td[7]/span/text()"))
            flight.tax = int(airport_tax)
            flight.surcharge = int(fuel_surcharge)
            flight.currency = "CNY"
            flight.source = "ctrip::ctrip"
            flight.seat_type = GetAllText(priceNode.xpath("td[2]")[0])

            allinfo.append((flight.flight_no,flight.plane_no,flight.airline,flight.dept_id,flight.dest_id,\
                        flight.dept_day,flight.dept_time,flight.dest_time,flight.dur,flight.price,flight.tax,\
                        flight.surcharge,flight.currency,flight.seat_type,flight.source,flight.return_rule,flight.stop))

        
    return allinfo
Exemple #5
0
def parser(content):
    #get section
    all_info = []
    flights = []

    section = section_pat.findall(content)

    for temp in section:
        every_flight = []

        #get flight number
        flights_temp = flight_no_pat.findall(temp)[0].split(':')
        if len(flights_temp) == 1:
            flight_string1 = flights_temp[0]
            flight_num = flight_string1[:flight_string1.find('-')]
        elif len(flights_temp) >= 2:
            flight_num2 = ''
            for flight_temp_aplha in flights_temp:
                flight_num2 = flight_num2 + '_' + flight_temp_aplha[:flight_temp_aplha.find('-')]
            flight_num = flight_num2
        every_flight.append(flight_num[1:])

        #get plane number
        every_flight.append('')

        #get airline name
        airline_name = airline_name_pat.findall(temp)[0]
        every_flight.append(airline_name)

        #get departure code
        departure_code = departure_code_pat.findall(temp)
        every_flight.append(departure_code[0])

        #get arrival code
        arrival_code = arrival_code_pat.findall(temp)
        arrival_code_length = len(arrival_code)
        every_flight.append(arrival_code[arrival_code_length-1])

        #get departure time
        departure_time_temp = departure_time_pat.findall(temp)
        dep_time = '2014 ' + departure_time_temp[0][4:].replace(',','')
        departure_time = str(datetime.strptime(dep_time,'%Y %d %b %I:%M %p')).replace(' ','T')
        every_flight.append(str(departure_time))

        #get arrival time
        arrival_time_temp = arrival_time_pat.findall(temp)
        arrival_time_length = len(arrival_time_temp)
        arr_time = '2014 ' + arrival_time_temp[arrival_time_length-1][4:].replace(',','')
        arrival_time = str(datetime.strptime(arr_time, '%Y %d %b %I:%M %p')).replace(' ','T')
        every_flight.append(str(arrival_time))

        #get flight duration
        flight_dur = []
        #day_pat = re.compile(r'(\d*?d)\s*?()')

        flight_duration = flight_duration_pat.findall(temp)
        for each_time in flight_duration:
            day_num = day_pat.findall(each_time)
            hour_num = hour_pat.findall(each_time)
            min_num = min_pat.findall(each_time)
            if day_num != []:
                day_num_temp = int(day_num[0])
            else:
                day_num_temp = 0

            if hour_num != []:
                hour_num_temp = int(hour_num[0])
            else:
                hour_num_temp = 0

            if min_num != []:
                min_num_temp = int(min_num[0])
            else:
                min_num_temp = 0

            flight_dur = day_num_temp * 86400 + hour_num_temp * 3600 + min_num_temp * 60

        every_flight.append(flight_dur)

        """
        #get waiting time
        waiting_time_pat = re.compile(r'<div class="flight-leg2 fl-layover">(.*?)</div>')
        waiting_time = waiting_time_pat.findall(temp)
        """

        #get tax
        tax = -1.0
        every_flight.append(tax)

        #get surcharge
        surcharge = -1.0
        every_flight.append(surcharge)

        #get currency
        currency = "CNY"
        every_flight.append(currency)

        #get seat type
        seat_type = '经济舱'
        every_flight.append(seat_type)

         #get return rule
        return_rule = ''
        every_flight.append(return_rule)

        tickets = []
        tickets_info = tickets_info_pat.findall(temp)

        for each_ticket in tickets_info:
            ticket = []
            #get tickets price
            tickets_price_temp = tickets_price_pat.findall(each_ticket)[0]
            m = tickets_price_temp.find('>') + 1
            ticket_price = tickets_price_temp[m:].replace(',','')
            ticket.append(ticket_price)

            #get ticket source
            ticket_web = tickets_web_pat.findall(each_ticket)[0]
            blnum = ticket_web.rfind('/')
            dnum = ticket_web.rfind('.')
            ticket_web_name = ticket_web[blnum+1:dnum].replace('-','_')
            m = ticket_web_name.find('.')
            if m > 0:
                ticket_web_name = ticket_web_name[:m]
            ticket.append('wego::' + ticket_web_name)

            #get others tickets links
            ticket_link = tickets_links_pat.findall(each_ticket)[0]
            ticket.append(ticket_link)
            tickets.append(ticket)
        every_flight.append(tickets)

        #get stops
        stops_temp = stops_pat.findall(every_flight[0])
        stops = len(stops_temp)
        every_flight.append(stops)

        #get update time
        update_time = time.strftime('%Y-%m-%dT%H:%M:%S',time.localtime(time.time()))
        every_flight.append(update_time)

        all_info.append(every_flight)

    for x in all_info:
        for y in range(len(x[13])):
            flight = Flight()
            flight.flight_no = x[0]
            flight.plane_no = 'NULL'#x[1]
            flight.airline = x[2]
            flight.dept_id = x[3]
            flight.dest_id = x[4]
            flight.dept_time = x[5]
            flight.dest_time = x[6]
            flight.dur = x[7]
            flight.price = x[13][y][0]
            flight.tax = x[8]
            flight.surcharge = x[9]
            flight.currency = x[10]
            flight.seat_type = x[11]
            flight.source = x[13][y][1]
            flight.return_rule = 'NULL'#x[12]
            #flight.book_url = 'http://www.wego.cn' + x[13][y][2]
            flight.stop = x[14]
            
            if 'T' in flight.dept_time:
                flight.dept_day = flight.dept_time.split('T')[0]
            else:
                pass
        

            flight_t = (flight.flight_no,flight.plane_no,flight.airline,flight.dept_id,flight.dest_id,\
                             flight.dept_day,flight.dept_time,flight.dest_time,flight.dur,flight.price,\
                             flight.tax,flight.surcharge,flight.currency,flight.seat_type,flight.source,\
                             flight.return_rule,flight.stop)
            flights.append(flight_t)
    return flights
Exemple #6
0
def ceair_page_parser(content):
    
    flights = {}
    tickets = []

    infos = json.loads(content[content.find('{'):])
    
    if infos['resultMsg'] != '':

        return tickets, flights

    currency = infos['currency']
    all_flights = infos['tripItemList'][0]['airRoutingList']

    for one_flight in all_flights:
        flight_info = one_flight['flightList']

        flight = Flight()
        flight.source = 'ceair::ceair'
        flight.stop = len(flight_info) - 1
        flight.currency = currency

        flight_nos = []
        plane_types = []
        airlines = []
        
        durings = []
        wait_times = []
        
        flight.dept_id = flight_info[0]['deptCd']
        flight.dest_id = flight_info[-1]['arrCd']
        flight.dept_time = standard_timeformatter(flight_info[0]['deptTime'])
        flight.dest_time = standard_timeformatter(flight_info[-1]['arrTime'])
        flight.dept_day = flight_info[0]['deptTime'].split(' ')[0]
        
        for item in flight_info:

            eachflight = EachFlight()

            eachflight.flight_no = item['flightNo']
            eachflight.airline = '东方航空'
            eachflight.plane_no = item['acfamily']
            eachflight.dept_id = item['deptCd']
            eachflight.dest_id = item['arrCd']
            eachflight.dept_time = standard_timeformatter(item['deptTime'])
            eachflight.dest_time = standard_timeformatter(item['arrTime'])
            eachflight.dur = hm_to_sec(item['duration'])

            eachflight.flight_key = eachflight.flight_no + '_' + eachflight.dept_id + '_' + eachflight.dest_id

            flights[eachflight.flight_key] = (eachflight.flight_no, eachflight.airline, eachflight.plane_no, eachflight.dept_id, eachflight.dest_id, eachflight.dept_time, eachflight.dest_time, eachflight.dur)

            flight_nos.append(eachflight.flight_no)
            plane_types.append(eachflight.plane_no)
            airlines.append(eachflight.airline)

            durings.append(eachflight.dur)
            wait_times.append(hm_to_sec(item['stayTime']))

        flight.flight_no = ''
        for flight_no in flight_nos:
            flight.flight_no = flight.flight_no + flight_no + '_'
        flight.flight_no = flight.flight_no[:-1]

        flight.plane_no = ''
        for plane_type in plane_types:
            flight.plane_no = flight.plane_no + plane_type + '_'
        flight.plane_no = flight.plane_no[:-1]

        flight.airline = ''
        for airline in airlines:
            flight.airline = flight.airline + airline + '_'
        flight.airline = flight.airline[:-1]

        flight.dur = 0
        for during in durings:
            flight.dur = flight.dur + during
        
        for wait_time in wait_times:
            flight.dur = flight.dur + wait_time

        if one_flight['priceDisp']['economy'] != '':
            flight.seat_type = '经济舱'
            flight.price = int(one_flight['priceDisp']['economy'])

            flight_tuple = (flight.flight_no,flight.plane_no,flight.airline,flight.dept_id,flight.dest_id,flight.dept_day,\
                    flight.dept_time,flight.dest_time,flight.dur,flight.price,flight.tax,flight.surcharge,\
                    flight.currency,flight.seat_type,flight.source,flight.return_rule,flight.stop)

            tickets.append(flight_tuple)

        if one_flight['priceDisp']['business'] != '':
            flight.seat_type = '商务舱'
            flight.price = int(one_flight['priceDisp']['business'])
            flight_tuple = (flight.flight_no,flight.plane_no,flight.airline,flight.dept_id,flight.dest_id,flight.dept_day,\
                    flight.dept_time,flight.dest_time,flight.dur,flight.price,flight.tax,flight.surcharge,\
                    flight.currency,flight.seat_type,flight.source,flight.return_rule,flight.stop)

            tickets.append(flight_tuple)
    return tickets, flights
def parsePage(content,dept_year, flight_no, orig_dept_time):

    result = -1

    each_flight_content = each_flight_content_pat.findall(content)

    if len(each_flight_content) > 0: 
        for each_flight_text in each_flight_content:
            flight = Flight()
            try:
                t_price = all_price_pat.findall(each_flight_text)[0]
                each_flight_text_temp = each_flight_content_temp_pat.findall(each_flight_text)[0]
                each_part_flight = each_part_flight_pat.findall(each_flight_text_temp)
                if len(each_part_flight) >= 1:
                    flight.dept_id = airport_pat.findall(each_part_flight[0])[0][1:-1]                    
                    flight.dest_id = airport_pat.findall(each_part_flight[-1])[-1][1:-1]
                     
                    dept_time_temp = dept_time_temp_pat.findall(each_part_flight[0])[0]
                    dest_time_temp = dest_time_temp_pat.findall(each_part_flight[-1])[-1]
                    flight.dept_day = dept_year + '-' + dept_time_temp[0].strip() + '-' + \
                            dept_time_temp[1].strip()
                    flight.dept_time = flight.dept_day + 'T' + dept_time_temp[2].strip() + ':00'
                    flight.dest_time = dept_year + '-' +  dept_time_temp[0].strip() + '-' + \
                    dest_time_temp[0].strip() + 'T' + dest_time_temp[1].strip()[-5:] + ':00'
                    
                    dept_time = int(time.mktime(datetime.datetime.strptime(flight.dept_time, \
                            '%Y-%m-%dT%H:%M:%S').timetuple()))
                    dest_time = int(time.mktime(datetime.datetime.strptime(flight.dest_time, \
                            '%Y-%m-%dT%H:%M:%S').timetuple()))
                    flight.dur = dest_time - dept_time + 3600
                    
                    flight.stop = len(each_part_flight) - 1
                else:
                    continue
                flight.price = price_pat.findall(each_flight_text)[0]
                if len(flight.price) > 1:
                    flight.price = int(flight.price[0])
                else:
                    flight.price = int(t_price)

                try:
                    flight.tax = int(t_price) - flight.price
                except:
                    flight.tax = -1.0
                    logger.info('Can not parse tax info!')

                flight.flight_no = ''
                flight.airline = ''
                flight.plane_no = ''
                for each_flight_text_t in each_part_flight:
                    flight.flight_no = flight.flight_no + flight_no_pat.findall(each_flight_text_t)[0][:8].replace(' ','') + '_'
                    flight.plane_no = flight.plane_no + plane_no_pat.findall(each_flight_text_t)[0].replace(' ','') + '_'
                    flight.airline = flight.airline + airline_pat.findall(each_flight_text_t)[0].replace(' ','') + '_'
                
                flight.flight_no = flight.flight_no[:-1]
                flight.plane_no = flight.plane_no[:-1]
                flight.airline = flight.airline[:-1]
                
                flight.return_rule = return_rule_pat.findall(each_flight_text)[0].replace('<p>','').replace('\n','') \
                        .replace('。','').replace('</p>','。').strip().replace(' ','')
                flight.currency = 'CNY'
                flight.source = 'feifan::feifan'
                flight.seat_type = '经济舱'

                if flight.flight_no == flight_no and flight.dept_time == orig_dept_time:
                    result = flight.price
                    break
            except Exception, e:
                continue
def vuelingparser(content,flight_no,req_dept_time):
    #allinfos = []
    #get flight num
    flight_num_list = []
    flight_num_info_temp = flight_no_pat.findall(content)
    if flight_num_info_temp != []:
        for flight_num_info in flight_num_info_temp:
            flight_num_temp_1 = flight_num_info.find('|')
            flight_num_temp_2 = flight_num_info.rfind('~^')

            if flight_num_temp_2 > 0:
                flight_num = flight_num_info[flight_num_temp_1+1:flight_num_temp_1+8]\
                        .replace('~','') + '_' + \
                        flight_num_info[flight_num_temp_2+2:flight_num_temp_2+9].replace('~','')
            else:
                flight_num = flight_num_info[flight_num_temp_1+1:flight_num_temp_1+8].replace('~','')
            flight_num_list.append(flight_num)

        #get station information
        #set station_temp,dept_id and dest_id pattern
        dept_id_list = []
        dest_id_list = []
        station_temp = station_temp_pat.findall(content)
        for station_temp_a in station_temp:
            station_info = station_temp_a.replace('\n', '').replace(' ','')
            dept_id_num = station_info.find('):')
            dept_id = station_info[dept_id_num-3:dept_id_num]
            dest_id_num = station_info.rfind(')')
            dest_id = station_info[dest_id_num-3:dest_id_num]
            dept_id_list.append(dept_id)
            dest_id_list.append(dest_id)

        #get flight_time information
        #set dept_time,dest_time,flight_time pattern
        dept_time_list = []
        dest_time_list = []
        stops_list = []

        flight_time_temp = flight_time_pat.findall(content)
        for time_temp in flight_time_temp:
            dept_time = dept_time_pat.findall(time_temp)[0]
            dest_time = dest_time_pat.findall(time_temp)[0]
            flight_num = flight_num_pat.findall(time_temp)[0]
            dept_time_list.append(dept_time)
            dest_time_list.append(dest_time)
            stops_list.append(flight_num)

        #get each kind flight price
        price_list = []
        price_text = price_pat.findall(content)
        for price_temp in price_text:
            price_temp_num = price_temp.rfind('>') + 1
            each_price = price_temp[price_temp_num:-3].replace(',','.')
            price_list.append(each_price)

        #set seat_type
        seat_type_list = ['经济舱','超经济舱','公务舱']
        seat_type = []

        for i in range(len(price_list)):
            if i % 3 == 0:
                seat_type.append(seat_type_list[0])
            elif i % 3 == 1:
                seat_type.append(seat_type_list[1])
            else:
                seat_type.append(seat_type_list[2])

        flight_no_l,dept_id_l,dest_id_l,dept_time_l,dest_time_l,stops_l = [],[],[],[],[],[]
        for j in range(len(stops_list)):
            for k in range(3):
                flight_no_l.append(flight_num_list[j])
                dept_id_l.append(dept_id_list[j])
                dest_id_l.append(dest_id_list[j])
                dept_time_l.append(dept_time_list[j])
                dest_time_l.append(dest_time_list[j])
                stops_l.append(stops_list[j])

        for i in range(len(price_list)):
            flight = Flight()
            flight.flight_no = flight_no_l[i]
            flight.plane_no = 'NULL'
            flight.airline = 'vueling'
            flight.dept_id = dept_id_l[i]
            flight.dest_id = dest_id_l[i]
            flight.dept_time = dept_time_l[i]
            flight.dest_time = dest_time_l[i]

            dept_time_c = str(dept_time_l[i]).replace('T',',').replace('-',',').replace(':',',').split(',') + [0,0,0]
            dept_time_t = date_handle(dept_time_c)
            dest_time_c = str(dest_time_l[i]).replace('T',',').replace('-',',').replace(':',',').split(',') + [0,0,0]
            dest_time_t = date_handle(dest_time_c)
            flight.dur = int(time.mktime(dest_time_t)) - int(time.mktime(dept_time_t))
            flight.price = price_list[i]
            flight.dept_day = flight.dept_time[:10]
            flight.currency = 'EUR'
            flight.seat_type = seat_type[i]
            flight.source = 'vueling:vueling'
            flight.stop = stops_l[i]
            if flight.flight_no == flight_no and flight.dept_time == req_dept_time:
                return flight.price

        '''
            flight_tuple = (flight.flight_no, flight.plane_no, flight.airline, flight.dept_id, \
                    flight.dest_id, flight.dept_day, flight.dept_time, flight.dest_time, \
                    flight.dur, flight.price, flight.tax, flight.surcharge, flight.currency, \
                    flight.seat_type, flight.source, flight.return_rule, flight.stop)

            allinfos.append(flight_tuple)
        return allinfos
        '''
    else:
        return -1
Exemple #9
0
def ceair_page_parser(content):

    flights = {}
    tickets = []

    infos = json.loads(content[content.find('{'):])

    if infos['resultMsg'] != '':

        return tickets, flights

    currency = infos['currency']
    all_flights = infos['tripItemList'][0]['airRoutingList']

    for one_flight in all_flights:
        flight_info = one_flight['flightList']

        flight = Flight()
        flight.source = 'ceair::ceair'
        flight.stop = len(flight_info) - 1
        flight.currency = currency

        flight_nos = []
        plane_types = []
        airlines = []

        durings = []
        wait_times = []

        flight.dept_id = flight_info[0]['deptCd']
        flight.dest_id = flight_info[-1]['arrCd']
        flight.dept_time = standard_timeformatter(flight_info[0]['deptTime'])
        flight.dest_time = standard_timeformatter(flight_info[-1]['arrTime'])
        flight.dept_day = flight_info[0]['deptTime'].split(' ')[0]

        for item in flight_info:

            eachflight = EachFlight()

            eachflight.flight_no = item['flightNo']
            eachflight.airline = '东方航空'
            eachflight.plane_no = item['acfamily']
            eachflight.dept_id = item['deptCd']
            eachflight.dest_id = item['arrCd']
            eachflight.dept_time = standard_timeformatter(item['deptTime'])
            eachflight.dest_time = standard_timeformatter(item['arrTime'])
            eachflight.dur = hm_to_sec(item['duration'])

            eachflight.flight_key = eachflight.flight_no + '_' + eachflight.dept_id + '_' + eachflight.dest_id

            flights[eachflight.flight_key] = (eachflight.flight_no,
                                              eachflight.airline,
                                              eachflight.plane_no,
                                              eachflight.dept_id,
                                              eachflight.dest_id,
                                              eachflight.dept_time,
                                              eachflight.dest_time,
                                              eachflight.dur)

            flight_nos.append(eachflight.flight_no)
            plane_types.append(eachflight.plane_no)
            airlines.append(eachflight.airline)

            durings.append(eachflight.dur)
            wait_times.append(hm_to_sec(item['stayTime']))

        flight.flight_no = ''
        for flight_no in flight_nos:
            flight.flight_no = flight.flight_no + flight_no + '_'
        flight.flight_no = flight.flight_no[:-1]

        flight.plane_no = ''
        for plane_type in plane_types:
            flight.plane_no = flight.plane_no + plane_type + '_'
        flight.plane_no = flight.plane_no[:-1]

        flight.airline = ''
        for airline in airlines:
            flight.airline = flight.airline + airline + '_'
        flight.airline = flight.airline[:-1]

        flight.dur = 0
        for during in durings:
            flight.dur = flight.dur + during

        for wait_time in wait_times:
            flight.dur = flight.dur + wait_time

        if one_flight['priceDisp']['economy'] != '':
            flight.seat_type = '经济舱'
            flight.price = int(one_flight['priceDisp']['economy'])

            flight_tuple = (flight.flight_no,flight.plane_no,flight.airline,flight.dept_id,flight.dest_id,flight.dept_day,\
                    flight.dept_time,flight.dest_time,flight.dur,flight.price,flight.tax,flight.surcharge,\
                    flight.currency,flight.seat_type,flight.source,flight.return_rule,flight.stop)

            tickets.append(flight_tuple)

        if one_flight['priceDisp']['business'] != '':
            flight.seat_type = '商务舱'
            flight.price = int(one_flight['priceDisp']['business'])
            flight_tuple = (flight.flight_no,flight.plane_no,flight.airline,flight.dept_id,flight.dest_id,flight.dept_day,\
                    flight.dept_time,flight.dest_time,flight.dur,flight.price,flight.tax,flight.surcharge,\
                    flight.currency,flight.seat_type,flight.source,flight.return_rule,flight.stop)

            tickets.append(flight_tuple)
    return tickets, flights
Exemple #10
0
def directFlight_parser(flightstring, date, airports_dict):
    flight = Flight()

    #直达航班提取出长度为1的列表
    cols01 = re.compile(r'<td class="cols01">(.*?)</td>',
                        re.S).findall(flightstring)[0]
    cols02 = re.compile(r'<td class="cols02">(.*?)</td>',
                        re.S).findall(flightstring)[0]
    cols03 = re.compile(r'<td class="cols03">(.*?)</td>',
                        re.S).findall(flightstring)[0]
    cols04 = re.compile(r'<td class="cols04">(.*?)</td>',
                        re.S).findall(flightstring)[0]
    cols05 = re.compile(r'<td class="cols05">(.*?)</td>',
                        re.S).findall(flightstring)[0]
    cols06 = re.compile(r'<td class="cols06">(.*?)</td>',
                        re.S).findall(flightstring)[0]

    aircorp = re.compile(r'</span>(.*?)<br />',
                         re.S).findall(cols01)[0].strip()
    flight_no = re.compile(r'<br />(.*?)&nbsp',
                           re.S).findall(cols01)[0].strip()
    plane_type = re.compile(r'method="PlaneType" >(.*?)</a>',
                            re.S).findall(cols01)[0].strip()

    airports = []
    days = 0
    dept_airport = re.compile(r'</span>(.*?)<br />',
                              re.S).findall(cols02)[0].strip()
    dept_time = re.compile(r'<span class=" t14 bold black">(.*?)</span>',
                           re.S).findall(cols02)[0].strip()
    arr_time_airport = re.compile(r'<br />(.*?)$',
                                  re.S).findall(cols02)[0].strip()
    if arr_time_airport.find('+1天') == -1:
        arr_time, arr_airport = arr_time_airport.split(
            ' ')[0].strip(), arr_time_airport.split(' ')[-1].strip()
    else:
        days += 1
        arr_time, arr_airport = arr_time_airport.split(' ')[0].strip().split(
            '(')[0].strip(), arr_time_airport.split(' ')[-1].strip()
    airports.append(dept_airport)
    airports.append(arr_airport)

    timeinfo = []
    during_time = re.compile(r'(.*?)<br />', re.S).findall(cols03)[0].strip()
    timeinfo.append(dept_time)
    timeinfo.append(arr_time)
    timeinfo.append(during_time)

    during = timeshifter(timeinfo)
    dept_date = datetime.datetime(string.atoi(date[0:4]),
                                  string.atoi(date[5:7]),
                                  string.atoi(date[8:]))
    dest_date = dept_date + datetime.timedelta(days)
    dept_daytime = date + 'T' + dept_time + ':00'
    dest_daytime = str(dest_date).split(' ')[0] + 'T' + arr_time + ':00'

    price = re.compile(r'</span>(.*?)</span>', re.S).findall(cols04)[0].strip()
    tax = re.compile(r'参考税 &yen;(.*?)<div class',
                     re.S).findall(cols04)[0].strip()

    flight.flight_no = flight_no
    flight.plane_no = plane_type
    flight.airline = aircorp
    if airports_dict.has_key(airports[0]):
        flight.dept_id = airports_dict[airports[0]]
    else:
        flight.dept_id = airports[0]
    if airports_dict.has_key(airports[-1]):
        flight.dest_id = airports_dict[airports[-1]]
    else:
        flight.dest_id = airports[-1]
    flight.dept_day = date
    flight.dept_time = dept_daytime
    flight.dest_time = dest_daytime
    flight.dur = during
    flight.price = float(price)
    flight.tax = float(tax)
    flight.surcharge = -1.0
    flight.currency = 'CNY'
    flight.seat_type = '经济舱'
    flight.source = 'elong::elong'
    flight.return_rule = 'NULL'
    flight.stop = 0

    flight_tuple = (flight.flight_no,flight.plane_no,flight.airline,flight.dept_id,flight.dest_id,flight.dept_day,\
            flight.dept_time,flight.dest_time,flight.dur,flight.price,flight.tax,flight.surcharge,\
            flight.currency,flight.seat_type,flight.source,flight.return_rule,flight.stop)

    return flight_tuple
Exemple #11
0
def parse_page(content, price_dict):

    flights = {}
    tickets = []
    result = {'ticket':tickets, 'flight':flights}

    try:
        json_temp = json.loads(content)
    except:
        return result

    if json_temp['Status'] == 'SUCCESS':
        for each_flight_json in json_temp['datalist']:
            flight = Flight()
            try:
                flight.flight_no = each_flight_json['Key']
                flight.stop = int(each_flight_json['OW'])
                flight.price = price_dict[flight.flight_no]
                #error price
                flight.tax = each_flight_json['AIP'][0]['TX']
                flight.dept_id = each_flight_json['ODO'][0]['OL']
                flight.dest_id = each_flight_json['ODO'][-1]['DL']
                flight.dept_time = each_flight_json['ODO'][0]['DD'] + ':00'
                flight.dest_time = each_flight_json['ODO'][-1]['AD'] + ':00'
                flight.currency = 'CNY'
                
                flight.source = 'jijitong::jijitong'
                flight.seat_type = '经济舱'
                flight.dept_day = flight.dept_time.split('T')[0]

                flight_num = len(flight.flight_no.split('_'))

                if flight_num == 1:
                    dur_A_temp = each_flight_json['ODO'][0]['ET']
                    flight.dur = int(dur_A_temp) * 60
                else:
                    dur_A_temp = 0
                    dur_A_temp2 = 0
                    for dept_content in each_flight_json['ODO'][:flight_num]:
                        dur_A_temp += int(dept_content['ET']) * 60

                    for x in range(1,flight_num):
                        #print x
                        dept_time_str = each_flight_json['ODO'][x-1]['AD']
                        #print dept_time_str
                        dest_time_str = each_flight_json['ODO'][x]['DD']
                        #print dest_time_str
                        dur_A_temp2 += durCal(dept_time_str, dest_time_str)
                        #print dur_A_temp2
                    flight.dur = dur_A_temp + dur_A_temp2

                plane_no = ''
                airline = ''
                for each_json_temp in each_flight_json['ODO']:

                    plane_no = plane_no + each_json_temp['EQ'] + '_'
                    airline = airline + each_json_temp['COA'] + '_'

                    try:
                        eachflight = EachFlight()
                        eachflight.flight_no = each_json_temp['MA']
                        eachflight.dept_id = each_json_temp['OL']
                        eachflight.dest_id = each_json_temp['DL']
                        eachflight.airline = each_json_temp['COA']
                        eachflight.plane_no = each_json_temp['EQ']
                        eachflight.dept_time = each_json_temp['DD'] + ':00'
                        eachflight.dest_time = each_json_temp['AD'] + ':00'
                        eachflight.dur = int(each_json_temp['ET']) * 60

                        eachflight.flight_key = eachflight.flight_no + '_' + eachflight.dept_id + '_' + eachflight.dest_id

                        eachflight_tuple = (eachflight.flight_no, eachflight.airline, eachflight.plane_no, eachflight.dept_id, \
                                eachflight.dest_id, eachflight.dept_time, eachflight.dest_time, eachflight.dur)
                        flights[eachflight.flight_key] = eachflight_tuple
                        #print eachflight_tuple
                    except Exception, e:
                        print str(e)
                        continue

                flight.plane_no = plane_no[:-1]
                flight.airline = airline[:-1]
                flight_tuple = (flight.flight_no, flight.plane_no, flight.airline, flight.dept_id, \
                        flight.dest_id, flight.dept_day, flight.dept_time, flight.dest_time, \
                        flight.dur, flight.price, flight.tax, flight.surcharge, flight.currency, \
                        flight.seat_type, flight.source, flight.return_rule, flight.stop) 

                tickets.append(flight_tuple)
            except Exception,e:
                logger.error('Can not parse flight info!' + str(e))
                continue
Exemple #12
0
        dept_time = datetime.datetime.strptime(node["DepartTime"],
                                               '%Y-%m-%d %H:%M:%S')
        dept_time = str(dept_time).replace(
            ' ',
            'T',
        )
        dest_time = datetime.datetime.strptime(node["ArrivalTime"],
                                               '%Y-%m-%d %H:%M:%S')
        dest_time = str(dest_time).replace(
            ' ',
            'T',
        )
        # 航班信息
        flight = Flight()
        flight.flight_no = ''
        flight.plane_no = ''
        flight.airline = ''
        dept_id_list = []

        for flightNode in node["FlightDetail"]:
            flight.flight_no = flight.flight_no + flightNode["FlightNo"] + '_'
            flight.airline = flight.airline + flightNode["AirlineName"] + '_'
            flight.plane_no = flight.plane_no + flightNode["CraftType"] + '_'
            dept_id_list.append(flightNode["DPort"])
            flight.dest_id = flightNode["APort"]

        #flight.stop = len(dept_id_list)
        flight.dept_id = dept_id_list[0]
        flight.flight_no = flight.flight_no[:-1]
        #flight.airline = flight.airline[:-1]
        #flight.plane_no = flight.plane_no[:-1]
Exemple #13
0
def parser(content):
    #get section
    all_info = []
    flights = []

    section = section_pat.findall(content)

    for temp in section:
        every_flight = []

        #get flight number
        flights_temp = flight_no_pat.findall(temp)[0].split(':')
        if len(flights_temp) == 1:
            flight_string1 = flights_temp[0]
            flight_num = flight_string1[:flight_string1.find('-')]
        elif len(flights_temp) >= 2:
            flight_num2 = ''
            for flight_temp_aplha in flights_temp:
                flight_num2 = flight_num2 + '_' + flight_temp_aplha[:flight_temp_aplha
                                                                    .find('-')]
            flight_num = flight_num2
        every_flight.append(flight_num[1:])

        #get plane number
        every_flight.append('')

        #get airline name
        airline_name = airline_name_pat.findall(temp)[0]
        every_flight.append(airline_name)

        #get departure code
        departure_code = departure_code_pat.findall(temp)
        every_flight.append(departure_code[0])

        #get arrival code
        arrival_code = arrival_code_pat.findall(temp)
        arrival_code_length = len(arrival_code)
        every_flight.append(arrival_code[arrival_code_length - 1])

        #get departure time
        departure_time_temp = departure_time_pat.findall(temp)
        dep_time = '2014 ' + departure_time_temp[0][4:].replace(',', '')
        departure_time = str(datetime.strptime(dep_time,
                                               '%Y %d %b %I:%M %p')).replace(
                                                   ' ', 'T')
        every_flight.append(str(departure_time))

        #get arrival time
        arrival_time_temp = arrival_time_pat.findall(temp)
        arrival_time_length = len(arrival_time_temp)
        arr_time = '2014 ' + arrival_time_temp[arrival_time_length -
                                               1][4:].replace(',', '')
        arrival_time = str(datetime.strptime(arr_time,
                                             '%Y %d %b %I:%M %p')).replace(
                                                 ' ', 'T')
        every_flight.append(str(arrival_time))

        #get flight duration
        flight_dur = []
        #day_pat = re.compile(r'(\d*?d)\s*?()')

        flight_duration = flight_duration_pat.findall(temp)
        for each_time in flight_duration:
            day_num = day_pat.findall(each_time)
            hour_num = hour_pat.findall(each_time)
            min_num = min_pat.findall(each_time)
            if day_num != []:
                day_num_temp = int(day_num[0])
            else:
                day_num_temp = 0

            if hour_num != []:
                hour_num_temp = int(hour_num[0])
            else:
                hour_num_temp = 0

            if min_num != []:
                min_num_temp = int(min_num[0])
            else:
                min_num_temp = 0

            flight_dur = day_num_temp * 86400 + hour_num_temp * 3600 + min_num_temp * 60

        every_flight.append(flight_dur)
        """
        #get waiting time
        waiting_time_pat = re.compile(r'<div class="flight-leg2 fl-layover">(.*?)</div>')
        waiting_time = waiting_time_pat.findall(temp)
        """

        #get tax
        tax = -1.0
        every_flight.append(tax)

        #get surcharge
        surcharge = -1.0
        every_flight.append(surcharge)

        #get currency
        currency = "CNY"
        every_flight.append(currency)

        #get seat type
        seat_type = '经济舱'
        every_flight.append(seat_type)

        #get return rule
        return_rule = ''
        every_flight.append(return_rule)

        tickets = []
        tickets_info = tickets_info_pat.findall(temp)

        for each_ticket in tickets_info:
            ticket = []
            #get tickets price
            tickets_price_temp = tickets_price_pat.findall(each_ticket)[0]
            m = tickets_price_temp.find('>') + 1
            ticket_price = tickets_price_temp[m:].replace(',', '')
            ticket.append(ticket_price)

            #get ticket source
            ticket_web = tickets_web_pat.findall(each_ticket)[0]
            blnum = ticket_web.rfind('/')
            dnum = ticket_web.rfind('.')
            ticket_web_name = ticket_web[blnum + 1:dnum].replace('-', '_')
            m = ticket_web_name.find('.')
            if m > 0:
                ticket_web_name = ticket_web_name[:m]
            ticket.append('wego::' + ticket_web_name)

            #get others tickets links
            ticket_link = tickets_links_pat.findall(each_ticket)[0]
            ticket.append(ticket_link)
            tickets.append(ticket)
        every_flight.append(tickets)

        #get stops
        stops_temp = stops_pat.findall(every_flight[0])
        stops = len(stops_temp)
        every_flight.append(stops)

        #get update time
        update_time = time.strftime('%Y-%m-%dT%H:%M:%S',
                                    time.localtime(time.time()))
        every_flight.append(update_time)

        all_info.append(every_flight)

    for x in all_info:
        for y in range(len(x[13])):
            flight = Flight()
            flight.flight_no = x[0]
            flight.plane_no = 'NULL'  #x[1]
            flight.airline = x[2]
            flight.dept_id = x[3]
            flight.dest_id = x[4]
            flight.dept_time = x[5]
            flight.dest_time = x[6]
            flight.dur = x[7]
            flight.price = x[13][y][0]
            flight.tax = x[8]
            flight.surcharge = x[9]
            flight.currency = x[10]
            flight.seat_type = x[11]
            flight.source = x[13][y][1]
            flight.return_rule = 'NULL'  #x[12]
            #flight.book_url = 'http://www.wego.cn' + x[13][y][2]
            flight.stop = x[14]

            if 'T' in flight.dept_time:
                flight.dept_day = flight.dept_time.split('T')[0]
            else:
                pass


            flight_t = (flight.flight_no,flight.plane_no,flight.airline,flight.dept_id,flight.dest_id,\
                             flight.dept_day,flight.dept_time,flight.dest_time,flight.dur,flight.price,\
                             flight.tax,flight.surcharge,flight.currency,flight.seat_type,flight.source,\
                             flight.return_rule,flight.stop)
            flights.append(flight_t)
    return flights
Exemple #14
0
                        eachflight.dur = (int(each_flight_content[7]) - int(each_flight_content[5]) + \
                                          int(each_flight_content[11])) * 60
                    else:
                        eachflight.dur = (int(each_flight_content[7]) - int(each_flight_content[5]) + \
                                          int(each_flight_content[11]) + 1400 ) * 60

                    flights[eachflight.flight_key] = (eachflight.flight_no, eachflight.airline, \
                        eachflight.plane_no, eachflight.dept_id, eachflight.dest_id, \
                        eachflight.dept_time, eachflight.dest_time, eachflight.dur)
                except Exception, e:
                    #print str(e)
                    continue

            flight.airline = airline[:-1]
            flight.flight_no = flight_no[:-1]
            flight.plane_no = plane_no[:-1]

            if flight.airline[0] == '_' or flight.plane_no[0] == '_':
                continue

            flight.stop = len(each_flight_list) - 1

            flight.source = 'smartfares::smartfares'
            flight.seat_type = '经济舱'
            flight.currency = 'USD'

            each_ticket_tuple = (flight.flight_no, flight.plane_no, flight.airline, \
                            flight.dept_id, flight.dest_id, flight.dept_day, flight.dept_time, \
                            flight.dest_time, flight.dur, flight.price, flight.tax, \
                            flight.surcharge, flight.currency, flight.seat_type, \
                            flight.source, flight.return_rule, flight.stop)
Exemple #15
0
        flight.stop = int(flight_info['transfer'])
        if flight.stop > 1:
            print 'found a flight whose transfer_times > 1'
            continue

        flight.source = 'lcair::lcair'

        flight.dept_id, flight.dest_id = flight_info['routeStr'].split(
            '-')[0], flight_info['routeStr'].split('-')[-1]

        flight.dept_day = flight_info['fromDate']

        flight.flight_no = ''
        flight.airline = ''
        flight.plane_no = ''

        flight_dur = 0

        #direct
        if flight.stop == 0:
            for single_flight in segments[0]['flights']:
                flight.flight_no = single_flight['flightNumber']
                try:
                    flight.airline = Airline[single_flight['airCo']]
                except:
                    flight.airline = single_flight['airCo']

                flight.plane_no = single_flight['equipType']
                flight.dept_time = CalDateTime(single_flight['fromDate'],
                                               single_flight['fromTime'])
def ParsePage(content):
    flights = []
    
    if content != '' and len(content) > 100:

        content_json = json.loads(content)
        #print content_json['OriginDestinationOption']
        if 'OriginDestinationOption' in content_json.keys():
            for each_flight_json in content_json['OriginDestinationOption']:
                #print each_flight_json
                try:
                    flight = Flight()
                    
                    flight_nums = len(each_flight_json['FlightSegment'])
                    
                    flight.flight_no = each_flight_json['FlightNos'].replace('-','_')
                    flight.dept_id = each_flight_json['AirPorts'][:3]
                    flight.dest_id = each_flight_json['AirPorts'][-3:]
                    
                    #print flight.flight_no,flight.dept_id,flight.dest_id
                    dept_time_tamp = each_flight_json['FlightSegment'][0]['DepartureDate'][6:-2]
                    dest_time_tamp = each_flight_json['FlightSegment'][-1]['ArrivalDate'][6:-2]
                    #flight.dur = int(dest_time_temp) - int(dept_time_temp)
                    #flight.dur = flight.dur / 1000
                    flight_time_json = each_flight_json['FlightSegment']
                    
                    if flight_nums == 1:
                        time_str_temp = flight_time_json[0]['FlyTime'].encode('utf8')
                    
                        str_num = time_str_temp.find('小')
                        if str_num < 0:
                            h_nums_str = time_str_temp[:time_str_temp.find('时')].strip()
                            m_nums_str = time_str_temp[time_str_temp.find('时')+3:time_str_temp.find('分')].strip()
                        else:
                            h_nums_str = time_str_temp[:time_str_temp.find('小时')].strip()
                            m_nums_str = time_str_temp[time_str_temp.find('小时')+6:time_str_temp.find('分')].strip()
                        flight.dur = 0
                        if h_nums_str != '':
                            flight.dur += int(h_nums_str) * 3600
                        if m_nums_str != '':
                            flight.dur += int(m_nums_str) * 60
                    else:
                        flight.dur = 0
                        for i in range(flight_nums):
                            time_str_temp = flight_time_json[i]['FlyTime'].encode('utf8')

                            str_num = time_str_temp.find('小')
                            if str_num > 0:
                                h_nums_str = time_str_temp[:time_str_temp.find('小时')].strip()
                                m_nums_str = time_str_temp[time_str_temp.find('小时')+6:time_str_temp.find('分')].strip()
                            else:
                                h_nums_str = time_str_temp[:time_str_temp.find('时')].strip()
                                m_nums_str = time_str_temp[time_str_temp.find('时')+3:time_str_temp.find('分')].strip()
                            if h_nums_str != '':
                                flight.dur += int(h_nums_str) * 3600
                            if m_nums_str != '':
                                flight.dur += int(m_nums_str) * 60

                        for i in range(1,flight_nums):
                            dept_time_temp = each_flight_json['FlightSegment'][i]['DepartureDate'][6:-2]
                            dest_time_temp = each_flight_json['FlightSegment'][i-1]['ArrivalDate'][6:-2]
                            flight.dur += (int(dept_time_temp) - int(dest_time_temp)) / 1000
                    flight.dept_time = time.strftime('%Y-%m-%d %H:%M:%S', \
                            time.localtime(float(str(dept_time_tamp)[:-3]))).replace(' ','T')
                    flight.dest_time = time.strftime('%Y-%m-%d %H:%M:%S', \
                            time.localtime(float(str(dest_time_tamp)[:-3]))).replace(' ','T')
                    flight.dept_day = flight.dept_time.split('T')[0]
                    flight.source = 'tongcheng::tongcheng'
                    flight.stop = int(flight_nums) - 1
                    #print flight.stop, flight.dept_time, flight.dept_day
                    flight.currency = 'CNY'
                    flight.price = each_flight_json['FareInfo'][0]['TCPrice_Audlt']
                    flight.tax = each_flight_json['FareInfo'][0]['TaxPrice_Audlt']
                    
                    #print flight.price,flight.tax
                    airline_temp = ''
                    plane_no_temp = ''
                    
                    #print each_flight_json['FlightSegment'][0]
        
                    for i in range(flight_nums):
                        plane_no_temp = plane_no_temp + \
                                each_flight_json['FlightSegment'][i]['Equipment'] + '_'
                    
                        airline_temp = airline_temp + \
                                each_flight_json['FlightSegment'][i]['AirCompanyName'] + '_'
                    
                    flight.plane_no = plane_no_temp[:-1]
                    flight.airline = airline_temp[:-1]
                    #print plane_no_temp,airline_temp
                    flight.seat_type = '经济舱'

                    flight_tuple = (flight.flight_no, flight.plane_no, flight.airline, \
                            flight.dept_id, flight.dest_id, flight.dept_day, flight.dept_time, \
                            flight.dest_time, flight.dur, flight.price, flight.tax, \
                            flight.surcharge, flight.currency, flight.seat_type, \
                            flight.source, flight.return_rule, flight.stop)
                    flights.append(flight_tuple)
                except Exception, e:
                    logger.info('tongchengFlight: Parse this flight failed!' + str(e))
                    continue
        else:
            logger.error('tongchengFlight: Crawl this page failed!')
            return flights
Exemple #17
0
def parsePage(content, dept_year):
    flights = []
    each_flight_content = each_flight_content_pat.findall(content)
    if len(each_flight_content) > 0:
        for each_flight_text in each_flight_content:
            flight = Flight()
            try:
                t_price = all_price_pat.findall(each_flight_text)[0]
                each_flight_text_temp = each_flight_content_temp_pat.findall(
                    each_flight_text)[0]
                each_part_flight = each_part_flight_pat.findall(
                    each_flight_text_temp)
                if len(each_part_flight) >= 1:
                    time.sleep(1)
                    flight.dept_id = airport_pat.findall(
                        each_part_flight[0])[0][1:-1]
                    flight.dest_id = airport_pat.findall(
                        each_part_flight[-1])[-1][1:-1]

                    dept_time_temp = dept_time_temp_pat.findall(
                        each_part_flight[0])[0]
                    dest_time_temp = dest_time_temp_pat.findall(
                        each_part_flight[-1])[-1]
                    flight.dept_day = dept_year + '-' + dept_time_temp[0].strip() + '-' + \
                            dept_time_temp[1].strip()
                    flight.dept_time = flight.dept_day + 'T' + dept_time_temp[
                        2].strip() + ':00'
                    flight.dest_time = dept_year + '-' +  dept_time_temp[0].strip() + '-' + \
                    dest_time_temp[0].strip() + 'T' + dest_time_temp[1].strip()[-5:] + ':00'

                    dept_time = int(time.mktime(datetime.datetime.strptime(flight.dept_time, \
                            '%Y-%m-%dT%H:%M:%S').timetuple()))
                    dest_time = int(time.mktime(datetime.datetime.strptime(flight.dest_time, \
                            '%Y-%m-%dT%H:%M:%S').timetuple()))
                    flight.dur = dest_time - dept_time + 3600

                    flight.stop = len(each_part_flight) - 1
                else:
                    continue
                flight.price = price_pat.findall(each_flight_text)[0]
                if len(flight.price) > 1:
                    flight.price = int(flight.price[0])
                else:
                    flight.price = int(t_price)

                try:
                    flight.tax = int(t_price) - flight.price
                except:
                    flight.tax = -1.0
                    logger.info('feifanFlight: Can not parse tax info!')

                flight.flight_no = ''
                flight.airline = ''
                flight.plane_no = ''
                for each_flight_text_t in each_part_flight:
                    flight.flight_no = flight.flight_no + flight_no_pat.findall(
                        each_flight_text_t)[0][:8].replace(' ', '') + '_'
                    flight.plane_no = flight.plane_no + plane_no_pat.findall(
                        each_flight_text_t)[0].replace(' ', '') + '_'
                    flight.airline = flight.airline + airline_pat.findall(
                        each_flight_text_t)[0].replace(' ', '') + '_'

                flight.flight_no = flight.flight_no[:-1]
                flight.plane_no = flight.plane_no[:-1]
                flight.airline = flight.airline[:-1]

                flight.return_rule = return_rule_pat.findall(each_flight_text)[0].replace('<p>','').replace('\n','') \
                        .replace('。','').replace('</p>','。').strip().replace(' ','')
                flight.currency = 'CNY'
                flight.source = 'feifan::feifan'
                flight.seat_type = '经济舱'
                #print flight.return_rule
                flight_tuple = (flight.flight_no, flight.plane_no, flight.airline, flight.dept_id, flight.dest_id, \
                        flight.dept_day, flight.dept_time, flight.dest_time, flight.dur, flight.price, \
                        flight.tax, flight.surcharge, flight.currency, flight.seat_type, flight.source, \
                        flight.return_rule, flight.stop)

                flights.append(flight_tuple)
            except Exception, e:
                #logger.info('Parse this flight failed!' + str(e))
                continue
Exemple #18
0
def vuelingvalidate(content, flight_no, req_dept_time):
    flight_num_list = []
    flight_num_info_temp = flight_no_pat.findall(content)
    if flight_num_info_temp != []:
        for flight_num_info in flight_num_info_temp:
            flight_num_temp_1 = flight_num_info.find('|')
            flight_num_temp_2 = flight_num_info.rfind('~^')

            if flight_num_temp_2 > 0:
                flight_num = flight_num_info[flight_num_temp_1+1:flight_num_temp_1+8].replace('~','') + '_' + \
                        flight_num_info[flight_num_temp_2+2:flight_num_temp_2+9].replace('~','')

            else:
                flight_num = flight_num_info[flight_num_temp_1 +
                                             1:flight_num_temp_1 + 8].replace(
                                                 '~', '')
            flight_num_list.append(flight_num)

        dept_id_list = []
        dest_id_list = []
        station_temp = station_temp_pat.findall(content)
        for station_temp_a in station_temp:
            station_info = station_temp_a.replace('\n', '').replace(' ', '')
            dept_id_num = station_info.find('):')
            dept_id = station_info[dept_id_num - 3:dept_id_num]
            dest_id_num = station_info.rfind(')')
            dest_id = station_info[dest_id_num - 3:dest_id_num]
            dept_id_list.append(dept_id)
            dest_id_list.append(dest_id)

        dept_time_list = []
        dest_time_list = []
        stops_list = []

        flight_time_temp = flight_time_pat.findall(content)
        for time_temp in flight_time_temp:
            dept_time = dept_time_pat.findall(time_temp)[0]
            dest_time = dest_time_pat.findall(time_temp)[0]
            flight_num = flight_num_pat.findall(time_temp)[0]
            dept_time_list.append(dept_time)
            dest_time_list.append(dest_time)
            stops_list.append(flight_num)

        price_list = []
        price_text = price_pat.findall(content)
        for price_temp in price_text:
            price_temp_num = price_temp.rfind('>') + 1
            each_price = price_temp[price_temp_num:-3].replace(',', '.')
            price_list.append(each_price)

        seat_type_list = ['经济舱', '超经济舱', '公务舱']
        seat_type = []

        for i in range(len(price_list)):
            if i % 3 == 0:
                seat_type.append(seat_type_list[0])
            elif i % 3 == 1:
                seat_type.append(seat_type_list[1])
            else:
                seat_type.append(seat_type_list[2])

        flight_no_l,dept_id_l,dest_id_l,dept_time_l,dest_time_l,stops_l = [],[],[],[],[],[]
        for j in range(len(stops_list)):
            for k in range(3):
                flight_no_l.append(flight_num_list[j])
                dept_id_l.append(dept_id_list[j])
                dest_id_l.append(dest_id_list[j])
                dept_time_l.append(dept_time_list[j])
                dest_time_l.append(dest_time_list[j])
                stops_l.append(stops_list[j])

        for i in range(len(price_list)):
            flight = Flight()
            flight.flight_no = flight_no_l[i]
            flight.plane_no = 'NULL'
            flight.airline = 'vueling'
            flight.dept_id = dept_id_l[i]
            flight.dest_id = dest_id_l[i]
            flight.dept_time = dept_time_l[i]
            flight.dest_time = dest_time_l[i]

            dept_time_c = str(dept_time_l[i]).replace('T', ',').replace(
                '-', ',').replace(':', ',').split(',') + [0, 0, 0]
            dept_time_t = date_handle(dept_time_c)
            dest_time_c = str(dest_time_l[i]).replace('T', ',').replace(
                '-', ',').replace(':', ',').split(',') + [0, 0, 0]
            dest_time_t = date_handle(dest_time_c)
            flight.dur = int(time.mktime(dest_time_t)) - int(
                time.mktime(dept_time_t))
            flight.price = price_list[i]
            flight.dept_day = flight.dept_time[:10]
            flight.currency = 'EUR'
            flight.seat_type = seat_type[i]
            flight.source = 'vueling:vueling'
            flight.stop = stops_l[i]
            if flight.flight_no == flight_no and flight.dept_time == req_dept_time:

                return flight.price

    else:
        return result
Exemple #19
0
def parsePage(content,dept_year):
    flights  = []
    each_flight_content = each_flight_content_pat.findall(content)
    if len(each_flight_content) > 0: 
        for each_flight_text in each_flight_content:
            flight = Flight()
            try:
                t_price = all_price_pat.findall(each_flight_text)[0]
                each_flight_text_temp = each_flight_content_temp_pat.findall(each_flight_text)[0]
                each_part_flight = each_part_flight_pat.findall(each_flight_text_temp)
                if len(each_part_flight) >= 1:
                    time.sleep(1)
                    flight.dept_id = airport_pat.findall(each_part_flight[0])[0][1:-1]                    
                    flight.dest_id = airport_pat.findall(each_part_flight[-1])[-1][1:-1]
                     
                    dept_time_temp = dept_time_temp_pat.findall(each_part_flight[0])[0]
                    dest_time_temp = dest_time_temp_pat.findall(each_part_flight[-1])[-1]
                    flight.dept_day = dept_year + '-' + dept_time_temp[0].strip() + '-' + \
                            dept_time_temp[1].strip()
                    flight.dept_time = flight.dept_day + 'T' + dept_time_temp[2].strip() + ':00'
                    flight.dest_time = dept_year + '-' +  dept_time_temp[0].strip() + '-' + \
                    dest_time_temp[0].strip() + 'T' + dest_time_temp[1].strip()[-5:] + ':00'
                    
                    dept_time = int(time.mktime(datetime.datetime.strptime(flight.dept_time, \
                            '%Y-%m-%dT%H:%M:%S').timetuple()))
                    dest_time = int(time.mktime(datetime.datetime.strptime(flight.dest_time, \
                            '%Y-%m-%dT%H:%M:%S').timetuple()))
                    flight.dur = dest_time - dept_time + 3600
                    
                    flight.stop = len(each_part_flight) - 1
                else:
                    continue
                flight.price = price_pat.findall(each_flight_text)[0]
                if len(flight.price) > 1:
                    flight.price = int(flight.price[0])
                else:
                    flight.price = int(t_price)

                try:
                    flight.tax = int(t_price) - flight.price
                except:
                    flight.tax = -1.0
                    logger.info('feifanFlight: Can not parse tax info!')

                flight.flight_no = ''
                flight.airline = ''
                flight.plane_no = ''
                for each_flight_text_t in each_part_flight:
                    flight.flight_no = flight.flight_no + flight_no_pat.findall(each_flight_text_t)[0][:8].replace(' ','') + '_'
                    flight.plane_no = flight.plane_no + plane_no_pat.findall(each_flight_text_t)[0].replace(' ','') + '_'
                    flight.airline = flight.airline + airline_pat.findall(each_flight_text_t)[0].replace(' ','') + '_'
                
                flight.flight_no = flight.flight_no[:-1]
                flight.plane_no = flight.plane_no[:-1]
                flight.airline = flight.airline[:-1]
                
                flight.return_rule = return_rule_pat.findall(each_flight_text)[0].replace('<p>','').replace('\n','') \
                        .replace('。','').replace('</p>','。').strip().replace(' ','')
                flight.currency = 'CNY'
                flight.source = 'feifan::feifan'
                flight.seat_type = '经济舱'
                #print flight.return_rule
                flight_tuple = (flight.flight_no, flight.plane_no, flight.airline, flight.dept_id, flight.dest_id, \
                        flight.dept_day, flight.dept_time, flight.dest_time, flight.dur, flight.price, \
                        flight.tax, flight.surcharge, flight.currency, flight.seat_type, flight.source, \
                        flight.return_rule, flight.stop)

                flights.append(flight_tuple)
            except Exception, e:
                #logger.info('Parse this flight failed!' + str(e))
                continue
    except Exception,e:
        return -1

    if len(params) != 2:
        logger.info('params error')
        return -1

    for node in data["FlightList"]:
        dept_time = datetime.datetime.strptime(node["DepartTime"], '%Y-%m-%d %H:%M:%S')
        dept_time = str(dept_time).replace(' ','T',)
        dest_time = datetime.datetime.strptime(node["ArrivalTime"], '%Y-%m-%d %H:%M:%S') 
        dest_time = str(dest_time).replace(' ','T',)
        # 航班信息
        flight = Flight()
        flight.flight_no = ''
        flight.plane_no = ''
        flight.airline = ''
        dept_id_list = []

        for flightNode in node["FlightDetail"]:
            flight.flight_no = flight.flight_no + flightNode["FlightNo"] + '_'
            flight.airline = flight.airline + flightNode["AirlineName"] + '_'
            flight.plane_no = flight.plane_no + flightNode["CraftType"] + '_'
            dept_id_list.append(flightNode["DPort"])
            flight.dest_id = flightNode["APort"] 

        #flight.stop = len(dept_id_list)
        flight.dept_id = dept_id_list[0]
        flight.flight_no = flight.flight_no[:-1]
        #flight.airline = flight.airline[:-1]
        #flight.plane_no = flight.plane_no[:-1]
Exemple #21
0
def transferFlight_parser(flightstring,date,airports_dict):
    flight = Flight()
    
    #中转航班,cols01-03有多个,cols04-06有一个
    cols01 = re.compile(r'<td class="cols01">(.*?)</td>',re.S).findall(flightstring)
    cols02 = re.compile(r'<td class="cols02">(.*?)</td>',re.S).findall(flightstring)
    cols03 = re.compile(r'<td class="cols03">(.*?)</td>',re.S).findall(flightstring)
    cols04 = re.compile(r'<td class="cols04">(.*?)</td>',re.S).findall(flightstring)[0]
    cols05 = re.compile(r'<td class="cols05">(.*?)</td>',re.S).findall(flightstring)[0]
    cols06 = re.compile(r'<td class="cols06">(.*?)</td>',re.S).findall(flightstring)[0]

    flight.stop = len(cols01) - 1

    if flight.stop > 2:
        return [] #暂定不要两次以上转机的方案

    aircorps = []
    flight_nos = []
    plane_types = []
    dept_times = []
    during_times = []
    airports = []
    days = 0
    timeinfo = []
    i = 0
    for i in range(0,len(cols01)):
        aircorp = re.compile(r'</span>(.*?)<br />',re.S).findall(cols01[i])[0].strip()
        flight_no = re.compile(r'<br />(.*?)&nbsp',re.S).findall(cols01[i])[0].strip()
        plane_type = re.compile(r'method="PlaneType" >(.*?)</a>',re.S).findall(cols01[i])[0].strip()

        dept_airport = re.compile(r'</span>(.*?)<br />',re.S).findall(cols02[i])[0].strip()
        if dept_airport.find('+2天') != -1:
            days += 2
        elif dept_airport.find('+1天') != -1:
            days += 1
        
        arr_time_airport = re.compile(r'<br />(.*?)$',re.S).findall(cols02[i])[0].strip()
        dept_time = re.compile(r'<span class=" t14 bold black">(.*?)</span>',re.S).findall(cols02[i])[0].strip()
        if arr_time_airport.find('+1天') == -1:
            arr_time, arr_airport = arr_time_airport.split(' ')[0].strip(),arr_time_airport.split(' ')[-1].strip()
        else:
            arr_time, arr_airport = arr_time_airport.split(' ')[0].strip().split('(')[0].strip(),arr_time_airport.split(' ')[-1].strip()
            if i == len(cols01) - 1:
                days += 1

        during_time =  re.compile(r'(.*?)<br />',re.S).findall(cols03[i])[0].strip()

        aircorps.append(aircorp)
        flight_nos.append(flight_no)
        plane_types.append(plane_type)
        dept_times.append(dept_time)
        during_times.append(during_time)
        airports.append(dept_airport)
        airports.append(arr_airport)
        timeinfo.append(dept_time)
        timeinfo.append(arr_time)
        timeinfo.append(during_time)

    during = 0#timeshifter(timeinfo)
    dept_date = datetime.datetime(string.atoi(date[0:4]),string.atoi(date[5:7]),string.atoi(date[8:]))
    dest_date = dept_date + datetime.timedelta(days)
    dept_daytime = date + 'T' + timeinfo[0]  + ':00'
    dest_daytime = str(dest_date).split(' ')[0] + 'T' + timeinfo[-2] + ':00'

    price = re.compile(r'</span>(.*?)</span>',re.S).findall(cols04)[0].strip()
    tax = re.compile(r'参考税 &yen;(.*?)<div class',re.S).findall(cols04)[0].strip()
   
    if flight.stop == 1:
        flight_no_str = flight_nos[0]+'_'+flight_nos[1]
        plane_no_str = plane_types[0]+'_'+plane_types[1]
        aircorp_str = aircorps[0]+'_'+aircorps[1] #也可以改为多家航空公司
    elif flight.stop == 2:
        flight_no_str = flight_nos[0]+'_'+flight_nos[1]+'_'+flight_nos[2]
        plane_no_str = plane_types[0]+'_'+plane_types[1]+'_'+flight_nos[2]
        aircorp_str = aircorps[0]+'_'+aircorps[1]+'_'+aircorps[2] #也可以改为多家航空公司
    else:
        return []

    flight.flight_no = flight_no_str
    flight.plane_no = plane_no_str
    flight.airline = aircorp_str
    if airports_dict.has_key(airports[0]):
        flight.dept_id = airports_dict[airports[0]]
    else:
        flight.dept_id = airports[0]
    if airports_dict.has_key(airports[-1]):
        flight.dest_id = airports_dict[airports[-1]]
    else:
        flight.dest_id = airports[-1]
    flight.dept_day = date
    flight.dept_time = dept_daytime
    flight.dest_time = dest_daytime
    flight.dur = during
    flight.price = int(price)
    flight.tax = int(tax)
    flight.surcharge = -1.0
    flight.currency = 'CNY'
    flight.seat_type = '经济舱'
    flight.source = 'elong::elong'             
    flight.return_rule = 'NULL'
    
    #flight_tuple = (flight.flight_no,flight.plane_no,flight.airline,flight.dept_id,flight.dest_id,flight.dept_day,\
             #flight.dept_time,flight.dest_time,flight.dur,flight.price,flight.tax,flight.surcharge,\
             #flight.currency,flight.seat_type,flight.source,flight.return_rule,flight.stop)

    #return flight_tuple
    return flight
Exemple #22
0
                flight.stop = int(flight_nums) - 1
                flight.currency = 'CNY'
                flight.price = each_flight_json['FareInfo'][0]['TCPrice_Audlt']
                flight.tax = each_flight_json['FareInfo'][0]['TaxPrice_Audlt']

                airline_temp = ''
                plane_no_temp = ''

                for i in range(flight_nums):
                    plane_no_temp = plane_no_temp + \
                            each_flight_json['FlightSegment'][i]['Equipment'] + '_'

                    airline_temp = airline_temp + \
                            each_flight_json['FlightSegment'][i]['AirCompanyName'] + '_'

                flight.plane_no = plane_no_temp[:-1]
                flight.airline = airline_temp[:-1]
                flight.seat_type = '经济舱'

                flight_tuple = (flight.flight_no, flight.plane_no, flight.airline, \
                        flight.dept_id, flight.dest_id, flight.dept_day, flight.dept_time, \
                        flight.dest_time, flight.dur, flight.price, flight.tax, \
                        flight.surcharge, flight.currency, flight.seat_type, \
                        flight.source, flight.return_rule, flight.stop)
                ticket_list.append(flight_tuple)
            except Exception, e:
                #logger.info('tongchengFlight: Parse this flight failed!' + str(e))
                continue
    else:
        logger.info('tongchengFlight: Parse Page Failed')
        return flights
Exemple #23
0
def elong_page_parser(htmlcontent):
    '''

    '''

    tickets = []
    flights = {}

    if htmlcontent.find('您访问的页面不存在或暂时无法访问') != -1:
        return tickets, flights

    try:
        flights_json = flightsPattern.findall(htmlcontent)[0]
        allflights = json.loads(flights_json)['FlightLegList']

        for flightInfo in allflights:
            flight = Flight()

            flight.currency = 'CNY'
            flight.seat_type = '经济舱'
            flight.stop = len(flightInfo['segs']) - 1
            flight.price = int(flightInfo['cabs'][0]['oprice'])
            flight.tax = int(flightInfo['tax'])
            flight.source = 'elong::elong'

            flight.airline = ''
            flight.plane_no = ''
            flight.flight_no = ''
            flight.dur = 0

            for singleflightInfo in flightInfo['segs']:
                eachFlight = EachFlight()
                eachFlight.flight_no = singleflightInfo['fltno']
                eachFlight.plane_no = singleflightInfo['plane']
                eachFlight.airline = singleflightInfo['corpn']
                eachFlight.dept_id = singleflightInfo['dport']
                eachFlight.dest_id = singleflightInfo['aport']
                eachFlight.dept_time = time_shifter(
                    singleflightInfo['dtime'])  #convert to 2014-07-11T12:06:00
                eachFlight.dest_time = time_shifter(singleflightInfo['atime'])
                eachFlight.dur = int(singleflightInfo['ftime']) * 60

                eachFlight.flight_key = eachFlight.flight_no + '_' + eachFlight.dept_id + '_' + eachFlight.dest_id

                flights[eachFlight.flight_key] = (eachFlight.flight_no, eachFlight.airline, eachFlight.plane_no, eachFlight.dept_id, \
                        eachFlight.dest_id, eachFlight.dept_time, eachFlight.dest_time, eachFlight.dur)

                flight.airline = flight.airline + eachFlight.airline + '_'
                flight.plane_no = flight.plane_no + eachFlight.plane_no + '_'
                flight.flight_no = flight.flight_no + eachFlight.flight_no + '_'

                flight.dur += eachFlight.dur

            if len(flightInfo['segs']) > 1:
                for i in range(0, len(flightInfo['segs']) - 1):
                    flight.dur += cal_wait_time(
                        time_shifter(flightInfo['segs'][i]['atime']),
                        time_shifter(flightInfo['segs'][i + 1]['dtime']))

            flight.flight_no = flight.flight_no[:-1]
            flight.plane_no = flight.plane_no[:-1]
            flight.airline = flight.airline[:-1]

            flight.dept_id = flightInfo['segs'][0]['dport']
            flight.dest_id = flightInfo['segs'][-1]['aport']
            flight.dept_time = time_shifter(flightInfo['segs'][0]['dtime'])
            flight.dest_time = time_shifter(flightInfo['segs'][-1]['atime'])
            flight.dept_day = flight.dept_time.split('T')[0]

            flight_tuple = (flight.flight_no,flight.plane_no,flight.airline,flight.dept_id,flight.dest_id,flight.dept_day,\
                    flight.dept_time,flight.dest_time,flight.dur,flight.price,flight.tax,flight.surcharge,flight.currency,\
                    flight.seat_type,flight.source,flight.return_rule,flight.stop)

            tickets.append(flight_tuple)

    except Exception, e:
        logger.info(str(e))
        return [], {}
Exemple #24
0
def transferFlight_parser(flightstring, date, airports_dict):
    flight = Flight()

    #中转航班,cols01-03有多个,cols04-06有一个
    cols01 = re.compile(r'<td class="cols01">(.*?)</td>',
                        re.S).findall(flightstring)
    cols02 = re.compile(r'<td class="cols02">(.*?)</td>',
                        re.S).findall(flightstring)
    cols03 = re.compile(r'<td class="cols03">(.*?)</td>',
                        re.S).findall(flightstring)
    cols04 = re.compile(r'<td class="cols04">(.*?)</td>',
                        re.S).findall(flightstring)[0]
    cols05 = re.compile(r'<td class="cols05">(.*?)</td>',
                        re.S).findall(flightstring)[0]
    cols06 = re.compile(r'<td class="cols06">(.*?)</td>',
                        re.S).findall(flightstring)[0]

    flight.stop = len(cols01) - 1

    if flight.stop > 2:
        return []  #暂定不要两次以上转机的方案

    aircorps = []
    flight_nos = []
    plane_types = []
    dept_times = []
    during_times = []
    airports = []
    days = 0
    timeinfo = []
    i = 0
    for i in range(0, len(cols01)):
        aircorp = re.compile(r'</span>(.*?)<br />',
                             re.S).findall(cols01[i])[0].strip()
        flight_no = re.compile(r'<br />(.*?)&nbsp',
                               re.S).findall(cols01[i])[0].strip()
        plane_type = re.compile(r'method="PlaneType" >(.*?)</a>',
                                re.S).findall(cols01[i])[0].strip()

        dept_airport = re.compile(r'</span>(.*?)<br />',
                                  re.S).findall(cols02[i])[0].strip()
        if dept_airport.find('+2天') != -1:
            days += 2
        elif dept_airport.find('+1天') != -1:
            days += 1

        arr_time_airport = re.compile(r'<br />(.*?)$',
                                      re.S).findall(cols02[i])[0].strip()
        dept_time = re.compile(r'<span class=" t14 bold black">(.*?)</span>',
                               re.S).findall(cols02[i])[0].strip()
        if arr_time_airport.find('+1天') == -1:
            arr_time, arr_airport = arr_time_airport.split(
                ' ')[0].strip(), arr_time_airport.split(' ')[-1].strip()
        else:
            arr_time, arr_airport = arr_time_airport.split(' ')[0].strip(
            ).split('(')[0].strip(), arr_time_airport.split(' ')[-1].strip()
            if i == len(cols01) - 1:
                days += 1

        during_time = re.compile(r'(.*?)<br />',
                                 re.S).findall(cols03[i])[0].strip()

        aircorps.append(aircorp)
        flight_nos.append(flight_no)
        plane_types.append(plane_type)
        dept_times.append(dept_time)
        during_times.append(during_time)
        airports.append(dept_airport)
        airports.append(arr_airport)
        timeinfo.append(dept_time)
        timeinfo.append(arr_time)
        timeinfo.append(during_time)

    during = timeshifter(timeinfo)
    dept_date = datetime.datetime(string.atoi(date[0:4]),
                                  string.atoi(date[5:7]),
                                  string.atoi(date[8:]))
    dest_date = dept_date + datetime.timedelta(days)
    dept_daytime = date + 'T' + timeinfo[0] + ':00'
    dest_daytime = str(dest_date).split(' ')[0] + 'T' + timeinfo[-2] + ':00'

    price = re.compile(r'</span>(.*?)</span>', re.S).findall(cols04)[0].strip()
    tax = re.compile(r'参考税 &yen;(.*?)<div class',
                     re.S).findall(cols04)[0].strip()

    if flight.stop == 1:
        flight_no_str = flight_nos[0] + '_' + flight_nos[1]
        plane_no_str = plane_types[0] + '_' + plane_types[1]
        aircorp_str = aircorps[0] + '_' + aircorps[1]  #也可以改为多家航空公司
    elif flight.stop == 2:
        flight_no_str = flight_nos[0] + '_' + flight_nos[1] + '_' + flight_nos[
            2]
        plane_no_str = plane_types[0] + '_' + plane_types[
            1] + '_' + flight_nos[2]
        aircorp_str = aircorps[0] + '_' + aircorps[1] + '_' + aircorps[
            2]  #也可以改为多家航空公司
    else:
        return []

    flight.flight_no = flight_no_str
    flight.plane_no = plane_no_str
    flight.airline = aircorp_str
    if airports_dict.has_key(airports[0]):
        flight.dept_id = airports_dict[airports[0]]
    else:
        flight.dept_id = airports[0]
    if airports_dict.has_key(airports[-1]):
        flight.dest_id = airports_dict[airports[-1]]
    else:
        flight.dest_id = airports[-1]
    flight.dept_day = date
    flight.dept_time = dept_daytime
    flight.dest_time = dest_daytime
    flight.dur = during
    flight.price = int(price)
    flight.tax = int(tax)
    flight.surcharge = -1.0
    flight.currency = 'CNY'
    flight.seat_type = '经济舱'
    flight.source = 'elong::elong'
    flight.return_rule = 'NULL'

    flight_tuple = (flight.flight_no,flight.plane_no,flight.airline,flight.dept_id,flight.dest_id,flight.dept_day,\
             flight.dept_time,flight.dest_time,flight.dur,flight.price,flight.tax,flight.surcharge,\
             flight.currency,flight.seat_type,flight.source,flight.return_rule,flight.stop)

    return flight_tuple
Exemple #25
0
                singleflight.plane_no = segment['aircraftCode']
                singleflight.airline = segment['airlineName']
                singleflight.dept_id = segment['departureAirportCode']
                singleflight.dest_id = segment['arrivalAirportCode']
                singleflight.dept_time = timeshifter(segment['departureTime'])
                singleflight.dest_time = timeshifter(segment['arrivalTime'])
                singleflight.dur = int(segment['duration'] * 60)

                singleflight.flight_key = singleflight.flight_no + '_' + singleflight.dept_id + '_' + singleflight.dest_id
                singleflight_tuple = (singleflight.flight_no, singleflight.airline, singleflight.plane_no, singleflight.dept_id, singleflight.dest_id, \
                        singleflight.dept_time, singleflight.dest_time, singleflight.dur)

                singleFlights[singleflight.flight_key] = singleflight_tuple

            flight.flight_no = flight_no[:-1]
            flight.plane_no = flight_plane[:-1]
            flight.airline = flight_aircorp[:-1]

            flights.append(flight)
        
        return flights, singleFlights

    return None, None
    

def feiquanqiu_task_parser(content):
    
    #初始化参数
    #返回para,改版后返回result
    result = {}
    result['para'] = {'ticket':[], 'flight':{ } }
Exemple #26
0
def directFlight_parser(flightstring,date,airports_dict):
    flight = Flight()

    #直达航班提取出长度为1的列表
    cols01 = re.compile(r'<td class="cols01">(.*?)</td>',re.S).findall(flightstring)[0]
    cols02 = re.compile(r'<td class="cols02">(.*?)</td>',re.S).findall(flightstring)[0]
    cols03 = re.compile(r'<td class="cols03">(.*?)</td>',re.S).findall(flightstring)[0]
    cols04 = re.compile(r'<td class="cols04">(.*?)</td>',re.S).findall(flightstring)[0]
    cols05 = re.compile(r'<td class="cols05">(.*?)</td>',re.S).findall(flightstring)[0]
    cols06 = re.compile(r'<td class="cols06">(.*?)</td>',re.S).findall(flightstring)[0]

    aircorp = re.compile(r'</span>(.*?)<br />',re.S).findall(cols01)[0].strip()
    flight_no = re.compile(r'<br />(.*?)&nbsp',re.S).findall(cols01)[0].strip()
    plane_type = re.compile(r'method="PlaneType" >(.*?)</a>',re.S).findall(cols01)[0].strip()

    airports = []
    days = 0
    dept_airport = re.compile(r'</span>(.*?)<br />',re.S).findall(cols02)[0].strip()
    dept_time = re.compile(r'<span class=" t14 bold black">(.*?)</span>',re.S).findall(cols02)[0].strip()
    arr_time_airport = re.compile(r'<br />(.*?)$',re.S).findall(cols02)[0].strip()
    if arr_time_airport.find('+1天') == -1:
        arr_time, arr_airport = arr_time_airport.split(' ')[0].strip(),arr_time_airport.split(' ')[-1].strip()
    else:
        days += 1
        arr_time, arr_airport = arr_time_airport.split(' ')[0].strip().split('(')[0].strip(),arr_time_airport.split(' ')[-1].strip()
    airports.append(dept_airport)
    airports.append(arr_airport)

    timeinfo = []
    during_time =  re.compile(r'(.*?)<br />',re.S).findall(cols03)[0].strip()
    timeinfo.append(dept_time)
    timeinfo.append(arr_time)
    timeinfo.append(during_time)
    
    during = 0#timeshifter(timeinfo)
    dept_date = datetime.datetime(string.atoi(date[0:4]),string.atoi(date[5:7]),string.atoi(date[8:]))
    dest_date = dept_date + datetime.timedelta(days)
    dept_daytime = date + 'T' + dept_time + ':00'
    dest_daytime = str(dest_date).split(' ')[0] + 'T' + arr_time + ':00'

    price = re.compile(r'</span>(.*?)</span>',re.S).findall(cols04)[0].strip()
    tax = re.compile(r'参考税 &yen;(.*?)<div class',re.S).findall(cols04)[0].strip()

    flight.flight_no = flight_no
    flight.plane_no = plane_type
    flight.airline = aircorp
    if airports_dict.has_key(airports[0]):
        flight.dept_id = airports_dict[airports[0]]
    else:
        flight.dept_id = airports[0]
    if airports_dict.has_key(airports[-1]):
        flight.dest_id = airports_dict[airports[-1]]
    else:
        flight.dest_id = airports[-1]
    flight.dept_day = date
    flight.dept_time = dept_daytime
    flight.dest_time = dest_daytime
    flight.dur = during
    flight.price = float(price)
    flight.tax = float(tax)
    flight.surcharge = -1.0
    flight.currency = 'CNY'
    flight.seat_type = '经济舱'
    flight.source = 'elong::elong'
    flight.return_rule = 'NULL'
    flight.stop = 0

    #flight_tuple = (flight.flight_no,flight.plane_no,flight.airline,flight.dept_id,flight.dest_id,flight.dept_day,\
            #flight.dept_time,flight.dest_time,flight.dur,flight.price,flight.tax,flight.surcharge,\
            #flight.currency,flight.seat_type,flight.source,flight.return_rule,flight.stop)

    #return flight_tuple
    return flight
Exemple #27
0
def elong_page_parser(htmlcontent):
    '''

    '''

    tickets = []
    flights = {}

    if htmlcontent.find('您访问的页面不存在或暂时无法访问') != -1:
        return tickets, flights

    try:
        flights_json = flightsPattern.findall(htmlcontent)[0]
        allflights = json.loads(flights_json)['FlightLegList']

        for flightInfo in allflights:
            flight = Flight()

            flight.currency = 'CNY'
            flight.seat_type = '经济舱'
            flight.stop = len(flightInfo['segs']) - 1
            flight.price = int(flightInfo['cabs'][0]['oprice'])
            flight.tax = int(flightInfo['tax'])
            flight.source = 'elong::elong'

            flight.airline = ''
            flight.plane_no = ''
            flight.flight_no = ''
            flight.dur = 0

            for singleflightInfo in flightInfo['segs']:
                eachFlight = EachFlight()
                eachFlight.flight_no = singleflightInfo['fltno']
                eachFlight.plane_no = singleflightInfo['plane']
                eachFlight.airline = singleflightInfo['corpn']
                eachFlight.dept_id = singleflightInfo['dport']
                eachFlight.dest_id = singleflightInfo['aport']
                eachFlight.dept_time = time_shifter(singleflightInfo['dtime'])  #convert to 2014-07-11T12:06:00
                eachFlight.dest_time = time_shifter(singleflightInfo['atime'])
                eachFlight.dur = int(singleflightInfo['ftime']) * 60

                eachFlight.flight_key = eachFlight.flight_no + '_' + eachFlight.dept_id + '_' + eachFlight.dest_id

                flights[eachFlight.flight_key] = (eachFlight.flight_no, eachFlight.airline, eachFlight.plane_no, eachFlight.dept_id, \
                        eachFlight.dest_id, eachFlight.dept_time, eachFlight.dest_time, eachFlight.dur)

                flight.airline = flight.airline + eachFlight.airline + '_'
                flight.plane_no = flight.plane_no + eachFlight.plane_no + '_'
                flight.flight_no = flight.flight_no + eachFlight.flight_no  + '_'

                flight.dur += eachFlight.dur
            
            if len(flightInfo['segs']) > 1:
                for i in range(0, len(flightInfo['segs']) - 1):
                        flight.dur += cal_wait_time(time_shifter(flightInfo['segs'][i]['atime']), time_shifter(flightInfo['segs'][i+1]['dtime']))

            flight.flight_no = flight.flight_no[:-1]
            flight.plane_no = flight.plane_no[:-1]
            flight.airline = flight.airline[:-1]

            flight.dept_id = flightInfo['segs'][0]['dport']
            flight.dest_id = flightInfo['segs'][-1]['aport']
            flight.dept_time = time_shifter(flightInfo['segs'][0]['dtime'])
            flight.dest_time = time_shifter(flightInfo['segs'][-1]['atime'])
            flight.dept_day = flight.dept_time.split('T')[0]

            flight_tuple = (flight.flight_no,flight.plane_no,flight.airline,flight.dept_id,flight.dest_id,flight.dept_day,\
                    flight.dept_time,flight.dest_time,flight.dur,flight.price,flight.tax,flight.surcharge,flight.currency,\
                    flight.seat_type,flight.source,flight.return_rule,flight.stop)

            tickets.append(flight_tuple)

    except Exception, e:
        logger.info(str(e))
        return [], {}
def vuelingparser(content, flight_no, req_dept_time):
    #allinfos = []
    #get flight num
    flight_num_list = []
    flight_num_info_temp = flight_no_pat.findall(content)
    if flight_num_info_temp != []:
        for flight_num_info in flight_num_info_temp:
            flight_num_temp_1 = flight_num_info.find('|')
            flight_num_temp_2 = flight_num_info.rfind('~^')

            if flight_num_temp_2 > 0:
                flight_num = flight_num_info[flight_num_temp_1+1:flight_num_temp_1+8]\
                        .replace('~','') + '_' + \
                        flight_num_info[flight_num_temp_2+2:flight_num_temp_2+9].replace('~','')
            else:
                flight_num = flight_num_info[flight_num_temp_1 +
                                             1:flight_num_temp_1 + 8].replace(
                                                 '~', '')
            flight_num_list.append(flight_num)

        #get station information
        #set station_temp,dept_id and dest_id pattern
        dept_id_list = []
        dest_id_list = []
        station_temp = station_temp_pat.findall(content)
        for station_temp_a in station_temp:
            station_info = station_temp_a.replace('\n', '').replace(' ', '')
            dept_id_num = station_info.find('):')
            dept_id = station_info[dept_id_num - 3:dept_id_num]
            dest_id_num = station_info.rfind(')')
            dest_id = station_info[dest_id_num - 3:dest_id_num]
            dept_id_list.append(dept_id)
            dest_id_list.append(dest_id)

        #get flight_time information
        #set dept_time,dest_time,flight_time pattern
        dept_time_list = []
        dest_time_list = []
        stops_list = []

        flight_time_temp = flight_time_pat.findall(content)
        for time_temp in flight_time_temp:
            dept_time = dept_time_pat.findall(time_temp)[0]
            dest_time = dest_time_pat.findall(time_temp)[0]
            flight_num = flight_num_pat.findall(time_temp)[0]
            dept_time_list.append(dept_time)
            dest_time_list.append(dest_time)
            stops_list.append(flight_num)

        #get each kind flight price
        price_list = []
        price_text = price_pat.findall(content)
        for price_temp in price_text:
            price_temp_num = price_temp.rfind('>') + 1
            each_price = price_temp[price_temp_num:-3].replace(',', '.')
            price_list.append(each_price)

        #set seat_type
        seat_type_list = ['经济舱', '超经济舱', '公务舱']
        seat_type = []

        for i in range(len(price_list)):
            if i % 3 == 0:
                seat_type.append(seat_type_list[0])
            elif i % 3 == 1:
                seat_type.append(seat_type_list[1])
            else:
                seat_type.append(seat_type_list[2])

        flight_no_l,dept_id_l,dest_id_l,dept_time_l,dest_time_l,stops_l = [],[],[],[],[],[]
        for j in range(len(stops_list)):
            for k in range(3):
                flight_no_l.append(flight_num_list[j])
                dept_id_l.append(dept_id_list[j])
                dest_id_l.append(dest_id_list[j])
                dept_time_l.append(dept_time_list[j])
                dest_time_l.append(dest_time_list[j])
                stops_l.append(stops_list[j])

        for i in range(len(price_list)):
            flight = Flight()
            flight.flight_no = flight_no_l[i]
            flight.plane_no = 'NULL'
            flight.airline = 'vueling'
            flight.dept_id = dept_id_l[i]
            flight.dest_id = dest_id_l[i]
            flight.dept_time = dept_time_l[i]
            flight.dest_time = dest_time_l[i]

            dept_time_c = str(dept_time_l[i]).replace('T', ',').replace(
                '-', ',').replace(':', ',').split(',') + [0, 0, 0]
            dept_time_t = date_handle(dept_time_c)
            dest_time_c = str(dest_time_l[i]).replace('T', ',').replace(
                '-', ',').replace(':', ',').split(',') + [0, 0, 0]
            dest_time_t = date_handle(dest_time_c)
            flight.dur = int(time.mktime(dest_time_t)) - int(
                time.mktime(dept_time_t))
            flight.price = price_list[i]
            flight.dept_day = flight.dept_time[:10]
            flight.currency = 'EUR'
            flight.seat_type = seat_type[i]
            flight.source = 'vueling:vueling'
            flight.stop = stops_l[i]
            if flight.flight_no == flight_no and flight.dept_time == req_dept_time:
                return flight.price
        '''
            flight_tuple = (flight.flight_no, flight.plane_no, flight.airline, flight.dept_id, \
                    flight.dest_id, flight.dept_day, flight.dept_time, flight.dest_time, \
                    flight.dur, flight.price, flight.tax, flight.surcharge, flight.currency, \
                    flight.seat_type, flight.source, flight.return_rule, flight.stop)

            allinfos.append(flight_tuple)
        return allinfos
        '''
    else:
        return -1
Exemple #29
0
        flight.seat_type = '经济舱'

        flight.stop = int(flight_info['transfer'])
        if flight.stop > 1:
            print 'found a flight whose transfer_times > 1'
            continue

        flight.source = 'lcair::lcair'

        flight.dept_id, flight.dest_id = flight_info['routeStr'].split('-')[0], flight_info['routeStr'].split('-')[-1]

        flight.dept_day = flight_info['fromDate']

        flight.flight_no = ''
        flight.airline = ''
        flight.plane_no = ''

        flight_dur = 0
        
        #direct
        if flight.stop == 0:
            for single_flight in segments[0]['flights']:
                flight.flight_no = single_flight['flightNumber']
                try:
                    flight.airline = Airline[single_flight['airCo']]
                except:
                    flight.airline = single_flight['airCo']

                flight.plane_no = single_flight['equipType']
                flight.dept_time = CalDateTime(single_flight['fromDate'], single_flight['fromTime'])
                flight.dest_time = CalDateTime(single_flight['toDate'], single_flight['toTime'])