def _parse(self):  # pylint: disable=R0912,R0915
        result = self._order
        if dt.now().timestamp() / int(self.received_time) < 1:
            self.received_time = int(int(self.received_time) / 1000)
        record_locator = strip_str(
            take_first(self._etree, aa_xp.RECORD_LOCATOR))
        if record_locator:
            result['record_locator'] = record_locator
        else:
            self.logger.error('recoder_locator is empty %s', self._message_id)

        itinerary_info_guest = strip_list(self._etree.xpath(aa_xp.INFO_2))
        if itinerary_info_guest:
            result['guest_name'] = itinerary_info_guest[0]
            # TODO (LensHo): to be fix
            if len(itinerary_info_guest) > 1:
                result['meal'] = itinerary_info_guest[-1]
            else:
                self.logger.warning('afford is empty %s', self._message_id)
        else:
            self.logger.error('guest_name is empty %s', self._message_id)

        itinerary_info_1 = strip_list(self._etree.xpath(aa_xp.INFO_1))
        if len(itinerary_info_1) == 9:
            result['carrier'] = itinerary_info_1[0]
            result['flight_number'] = itinerary_info_1[1]
            result['depart_city'] = itinerary_info_1[2]
            result['depart_date'] = itinerary_info_1[3]
            result['depart_time'] = itinerary_info_1[4]
            result['arrive_city'] = itinerary_info_1[5]
            result['arrive_date'] = itinerary_info_1[6]
            result['arrive_time'] = itinerary_info_1[7]
            result['booking_code'] = itinerary_info_1[8]
            depart_date_time = ' '.join(
                [itinerary_info_1[3], itinerary_info_1[4]])
            arrive_date_time = ' '.join(
                [itinerary_info_1[6], itinerary_info_1[7]])
            tz_depart = to_timezone(result['depart_city'])
            tz_arrive = to_timezone(result['arrive_city'])
            depart_dt = DateTime(depart_date_time, 'DDMMM h:mm A')
            depart_year = depart_dt.received_time_to_year(self.received_time)
            depart_datetime_formatted = depart_dt.year_to_datetime(
                depart_year, tz_depart)
            if depart_datetime_formatted:
                result['depart_datetime_formatted'] = depart_datetime_formatted
            else:
                self.logger.error('depart_datetime_formatted is empty %s',
                                  self._message_id)
            arrive_dt = DateTime(arrive_date_time, 'DDMMM h:mm A')
            arrive_year = arrive_dt.received_time_to_year(self.received_time)
            arrive_datetime_formatted = arrive_dt.year_to_datetime(
                arrive_year, tz_arrive)
            if arrive_datetime_formatted:
                result['arrive_datetime_formatted'] = arrive_datetime_formatted
            else:
                self.logger.error('arrive_datetime_formatted is empty %s',
                                  self._message_id)
        else:
            self.logger.error('itinerary_details is empty %s',
                              self._message_id)

        # TODO: (LensHo) might not exist
        itinerary_info_2 = strip_list(self._etree.xpath(aa_xp.INFO_3))
        itinerary_info_trans = strip_list(self._etree.xpath(aa_xp.INFO_4))
        if itinerary_info_trans:
            result['transfer_information'] = itinerary_info_trans[0]
        else:
            self.logger.info('transfer_information is empty %s',
                             self._message_id)
        itinerary_info_afford = strip_list(self._etree.xpath(aa_xp.INFO_5))
        if itinerary_info_afford:
            result['trans_meal'] = itinerary_info_afford[-1]
        if len(itinerary_info_2) == 9:
            depart_date_time = ' '.join(
                [itinerary_info_2[3], itinerary_info_2[4]])
            arrive_date_time = ' '.join(
                [itinerary_info_2[6], itinerary_info_2[7]])
            result['trans_carrier'] = itinerary_info_2[0]
            result['trans_flight_number'] = itinerary_info_2[1]
            result['trans_depart_city'] = itinerary_info_2[2]
            result['trans_depart_date'] = itinerary_info_2[3]
            result['trans_depart_time'] = itinerary_info_2[4]
            result['trans_arrive_city'] = itinerary_info_2[5]
            result['trans_arrive_date'] = itinerary_info_2[6]
            result['trans_arrive_time'] = itinerary_info_2[7]
            result['trans_booking_code'] = itinerary_info_2[8]
            tz_depart = to_timezone(result['trans_depart_city'])
            tz_arrive = to_timezone(result['trans_arrive_city'])
            depart_dt = DateTime(depart_date_time, 'DDMMM h:mm A')
            depart_year = depart_dt.received_time_to_year(self.received_time)
            depart_datetime_formatted_2 = depart_dt.year_to_datetime(
                depart_year, tz_depart)
            if depart_datetime_formatted_2:
                result['trans_depart_datetime_formatted'] = \
                    depart_datetime_formatted_2
            else:
                self.logger.error(
                    'trans_depart_datetime_formatted is empty %s',
                    self._message_id)
            arrive_dt = DateTime(arrive_date_time, 'DDMMM h:mm A')
            arrive_year = arrive_dt.received_time_to_year(self.received_time)
            arrive_datetime_formatted_2 = arrive_dt.year_to_datetime(
                arrive_year, tz_arrive)
            if arrive_datetime_formatted_2:
                result['trans_arrive_datetime_formatted'] = \
                    arrive_datetime_formatted_2
            else:
                self.logger.error(
                    'trans_arrive_datetime_formatted is empty %s',
                    self._message_id)
        else:
            self.logger.info('transfer information is empty %s',
                             self._message_id)

        receipt_info = self._etree.xpath(aa_xp.INFO_6)
        if receipt_info:
            name_dict = {
                'TICKET NUMBER': 'ticket_number',
                'FARE-SGD': 'fare_sgd',
                'EQUIV FARE-EUR': 'fare_equal_to_eur',
                'TAXES AND CARRIER-IMPOSED FEES': 'taxes_fee'
            }
            for i in receipt_info[0]:
                name = take_first(i, './/strong/text()')
                value = take_first(i, './/td/text()')
                if value and name in name_dict.keys():
                    result[name_dict[name]] = value
                elif name in ['TICKET TOTAL', 'PASSENGER']:
                    pass
                else:
                    self.logger.warning(
                        '%s: passengers is empty',
                        self._message_id,
                    )
        else:
            self.logger.error('receipt_info is empty %s', self._message_id)

        total_price = strip_str(take_first(self._etree, aa_xp.TOTAL))
        if total_price:
            result['total_cost'] = total_price
        else:
            self.logger.error('price_details is empty %s', self._message_id)

        notice_1 = take_first(self._etree, aa_xp.NOTICE_1)
        notice_3 = take_first(self._etree, aa_xp.NOTICE_3)
        notice_1 = notice_1 and strip_str(remove_tags(
            etree.tostring(notice_1)))
        if not notice_1:
            self.logger.warning('part1 of notice is empty %s',
                                self._message_id)
        notice_2 = strip_str(take_first(self._etree, aa_xp.NOTICE_2))
        if not notice_2:
            self.logger.warning('part2 of notice is empty %s',
                                self._message_id)
        notice_3 = strip_str(remove_tags(etree.tostring(notice_3))) \
            if notice_3 else ''
        if not notice_3:
            self.logger.warning('part3 of notice is empty %s',
                                self._message_id)
        if notice_1 or notice_2 or notice_3:
            result['notice'] = strip_list([notice_1, notice_2, notice_3])

        related_links_1 = self._etree.xpath(aa_xp.RELATED_LINK_1)
        if not related_links_1:
            self.logger.warning('part1 of related_links is empty %s',
                                self._message_id)
        related_links_3 = self._etree.xpath(aa_xp.RELATED_LINK_3)
        if not related_links_3:
            self.logger.warning('part2 of related_links is empty %s',
                                self._message_id)
        related_texts_1 = strip_list(self._etree.xpath(aa_xp.RELATED_TEXT_1))
        related_texts_3 = strip_list(self._etree.xpath(aa_xp.RELATED_TEXT_3))
        links = chain(related_links_1 or [], related_links_3 or [])
        texts = chain(related_texts_1 or [], related_texts_3 or [])
        name_dict = {
            'Check-In Options': 'check_in_options_link',
            'Baggage Information.': 'baggage_information_link',
            'U.S. Entry Requirements.': 'us_entry_requirements_link',
            'Contact American.': 'contact_american_link',
            'Worldwide Phone Numbers': 'worldwide_phone_numbers_link',
            'Conditions of Carriage': 'conditions_of_carriage_link'
        }
        for i, j in zip(texts, links):
            if i in name_dict:
                result[name_dict[i]] = j
            else:
                self.logger.warning('%s is %s %s', i, j, self._message_id)
        return result
Beispiel #2
0
    def _parse(self):
        result = self._order
        confirm_code = take_first(self._etree, delta_xp.CONFIRM_CODE)
        if not confirm_code:
            raise MissingColumnError('confirm_code', self._message_id)
        confirm_code = remove_tags(etree.tostring(confirm_code))
        result['confirm_code'] = ''.join(confirm_code)

        result.set('modify_link', take_first(self._etree,
                                             delta_xp.CHANGE_LINK))

        # TODO(LensHo): 起飞降落只有一个日期
        flight_date = strip_list(self._etree.xpath(delta_xp.FLIGHT_DATE))
        if not flight_date:
            raise MissingColumnError('flight_date', self._message_id)

        result['depart_date'] = result['arrive_date'] = flight_date[0]
        if len(flight_date) >= 2:
            result['trans_depart_date'] = flight_date[1]
            result['trans_arrive_date'] = flight_date[1]

        flight_num = strip_list(self._etree.xpath(delta_xp.FLIGHT_NUM))
        if not flight_num:
            raise MissingColumnError('flight_number', self._message_id)

        result['flight_number'] = flight_num[0]
        if len(flight_num) >= 2:
            result['trans_flight_number'] = flight_num[1]

        cabin = strip_list(self._etree.xpath(delta_xp.CABIN))
        if cabin:
            result['class'] = cabin[0]
            if len(cabin) >= 2:
                result['trans_class'] = cabin[1]
        else:
            self.logger.error('class is empty %s', self._message_id)

        depart_station = strip_list(self._etree.xpath(delta_xp.DEPART_STATION))
        if not depart_station:
            raise MissingColumnError('depart_station', self._message_id)
        result['depart_city'] = depart_station[0]
        if len(depart_station) >= 2:
            result['trans_depart_city'] = depart_station[1]

        depart_time = strip_list(self._etree.xpath(delta_xp.DEPART_TIME))
        if not depart_time:
            raise MissingColumnError('depart_time', self._message_id)
        result['depart_time'] = depart_time[0]
        if len(depart_time) >= 2:
            result['trans_depart_time'] = depart_time[1]

        arrive_time = strip_list(self._etree.xpath(delta_xp.ARRIVE_TIME))
        if not arrive_time:
            raise MissingColumnError('arrive_time', self._message_id)
        result['arrive_time'] = arrive_time[0]
        if len(arrive_time) >= 2:
            result['trans_arrive_time'] = arrive_time[1]
        elif len(arrive_time) > 2 and 'm' in arrive_time[2]:
            self.logger.error('换乘超过1次 %s', self._message_id)

        arrive_station = strip_list(self._etree.xpath(delta_xp.ARRIVE_STATION))
        if not arrive_station:
            raise MissingColumnError('arrive_station', self._message_id)
        result['arrive_city'] = arrive_station[0]
        if len(arrive_station) >= 2:
            result['trans_arrive_city'] = arrive_station[1]

        restricted_title = strip_str(
            take_first(self._etree, delta_xp.RESTRICTED_TITLE))
        restricted_text = take_first(self._etree, delta_xp.RESTRICTED_TEXT)
        if len(restricted_text) and restricted_title:
            text = strip_str(remove_tags(etree.tostring(restricted_text)))
            if 'RESTRICTED HAZARDOUS ITEMS' in restricted_title:
                result['restricted_hazardous_items'] = text
            else:
                # 有可能是其他条目
                self.logger.warning('%s is %s %s', restricted_title, text,
                                    self._message_id)
        else:
            self.logger.error('restricted_hazardous_items is empty %s',
                              self._message_id)

        result['guest_name'] = strip_str(
            take_first(self._etree, delta_xp.GUEST_NAME))
        if not result['guest_name']:
            result.pop('guest_name')
            self.logger.error('guest_name is empty %s', self._message_id)

        seat = strip_list(self._etree.xpath(delta_xp.SEAT))
        if seat:
            result['seat'] = seat[0]
            if len(seat) >= 2:
                result['trans_seat'] = seat[1]
        else:
            self.logger.error('seat is empty %s', self._message_id)

        result['ticket_number'] = strip_str(
            take_last(self._etree, delta_xp.TICKET_NUM))
        if not result['ticket_number']:
            raise MissingColumnError('ticket_number', self._message_id)

        try:
            result['issue_date'], result['expire_date'] = strip_list(
                self._etree.xpath(delta_xp.ISSUE_EXPIRE_DATE))
        except ValueError:
            self.logger.error('issue_expire_date is empty %s',
                              self._message_id)

        year = int('20' + result.get('issue_date')[-2:]) \
            if result.get('issue_date') else 0
        tz_dp = to_timezone(result.get('depart_city'))
        tz_ar = to_timezone(result.get('arrive_city'))
        dp_dt = ' '.join([result['depart_date'], result['depart_time']])
        ar_dt = ' '.join([result['arrive_date'], result['arrive_time']])
        dt_format = 'DDMMM h:mmA'
        dt_dp = DateTime(dp_dt, dt_format)
        year = year or dt_dp.received_time_to_year(self.received_time)
        result['depart_datetime_formatted'] = dt_dp.year_to_datetime(
            year, tz_dp)
        if not result['depart_datetime_formatted']:
            result.pop('depart_datetime_formatted')
            self.logger.error('depart datetime formatted is empty %s',
                              self._message_id)

        result['arrive_datetime_formatted'] = DateTime(
            ar_dt, dt_format).year_to_datetime(year, tz_ar)

        if not result['arrive_datetime_formatted']:
            result.pop('arrive_datetime_formatted')
            self.logger.error('arrive datetime formatted is empty %s',
                              self._message_id)

        # TODO(LensHo): 没考虑转机时过年的情况, 只考虑换乘一次
        trans_dp_date = result.get('trans_depart_date')
        trans_dp_time = result.get('trans_depart_time')
        if trans_dp_date and trans_dp_time:
            trans_dp_dt = trans_dp_date + ' ' + trans_dp_time
            result['trans_depart_datetime_formatted'] = DateTime(
                trans_dp_dt, dt_format).year_to_datetime(year, tz_ar)

            if not result['trans_depart_datetime_formatted']:
                result.pop('trans_depart_datetime_formatted')
                self.logger.error(
                    'trans_depart_datetime_formatted is empty %s',
                    self._message_id)

        trans_ar_date = result.get('trans_arrive_date')
        trans_ar_time = result.get('trans_arrive_time')
        tz_trans_ar = to_timezone(result.get('trans_arrive_city'))
        if trans_ar_date and trans_ar_time:
            trans_ar_dt = trans_ar_date + ' ' + trans_ar_time
            result['trans_arrive_datetime_formatted'] = DateTime(
                trans_ar_dt, dt_format).year_to_datetime(year, tz_trans_ar)
            if not result['trans_arrive_datetime_formatted']:
                result.pop('trans_arrive_datetime_formatted')
                self.logger.error(
                    'trans_arrive_datetime_formatted is empty %s',
                    self._message_id)

        result['payment_card_number'] = strip_str(
            take_first(self._etree, delta_xp.PAYMENT_METHOD_CARD))
        if not result['payment_card_number']:
            result.pop('payment_card_number')
            self.logger.error('payment_card is empty %s', self._message_id)

        result['payment'] = strip_str(
            take_first(self._etree, delta_xp.PAYMENT_METHOD_MONEY))
        if not result['payment']:
            result.pop('payment')
            self.logger.error('payment is empty %s', self._message_id)

        result['duration'] = strip_str(
            take_first(self._etree, delta_xp.DURATION))
        if not result['duration']:
            result.pop('duration')
            self.logger.error('duration is empty %s', self._message_id)

        result['transportation_fare'] = strip_str(
            take_first(self._etree, delta_xp.BASE_FARE))
        if not result['transportation_fare']:
            result.pop('transportation_fare')
            self.logger.error('transportation_fare is empty %s',
                              self._message_id)

        result['price'] = strip_str(take_first(self._etree,
                                               delta_xp.TAXES_FEE))
        if not result['price']:
            result.pop('price')
            self.logger.error('price is empty %s', self._message_id)

        result['total_cost'] = strip_str(
            take_first(self._etree, delta_xp.TOTAL))
        if not result['total_cost']:
            result.pop('total_cost')
            self.logger.error('total_cost is empty %s', self._message_id)

        baggage_info = take_first(self._etree, delta_xp.BAGGAGE_INFO)
        baggage_text = take_first(self._etree, delta_xp.BAGGAGE_TEXT)
        baggage_link = take_first(self._etree, delta_xp.BAGGAGE_LINK)
        if baggage_info:
            result['baggage_allowance'] = {
                'text': remove_tags(etree.tostring(baggage_info)).strip()
            }
            if baggage_link and baggage_text:
                result['baggage_allowance'].update(
                    {'links': {
                        'name': baggage_text,
                        'value': baggage_link
                    }})
        else:
            self.logger.error('baggage_allowance is empty %s',
                              self._message_id)

        hazardous_title = strip_str(
            take_first(self._etree, delta_xp.HAZARDOUS_MATERIALS_TITLE))
        hazardous_text_1 = strip_str(
            take_first(self._etree, delta_xp.HAZARDOUS_MATERIALS_TEXT_1))
        hazardous_text_2 = take_first(self._etree,
                                      delta_xp.HAZARDOUS_MATERIALS_TEXT_2)
        link = take_first(self._etree, delta_xp.HAZARDOUS_MATERIALS_LINK)
        text = strip_str(
            take_first(self._etree, delta_xp.HAZARDOUS_MATERIALS_TEXT))
        if hazardous_title == 'Transportation of Hazardous Materials':
            if hazardous_text_2:
                hazardous_text_2 = remove_tags(
                    etree.tostring(hazardous_text_2))
            else:
                hazardous_text_2 = ''
                self.logger.error(
                    'part of hazardous_material text is empty %s',
                    self._message_id)
            if hazardous_text_1 or hazardous_text_2:
                hazardous_text = '\n'.join(
                    [hazardous_text_1, hazardous_text_2]).strip()
                result['transportation_of_hazardous_materials'] = {
                    'text': hazardous_text
                }
            if link and text and \
                    result.get('transportation_of_hazardous_materials'):
                result['transportation_of_hazardous_materials'].update(
                    {'links': {
                        'name': text,
                        'value': link
                    }})
        else:
            self.logger.warning(
                'Transportation of Hazardous Materials not found %s',
                self._message_id)

        return result