def _parse(self): # pylint: disable=R0912,R0915 result = self._order if dt.now().timestamp() / int(self.received_time) < 1: self.received_time = int(int(self.received_time) / 1000) record_locator = strip_str( take_first(self._etree, aa_xp.RECORD_LOCATOR)) if record_locator: result['record_locator'] = record_locator else: self.logger.error('recoder_locator is empty %s', self._message_id) itinerary_info_guest = strip_list(self._etree.xpath(aa_xp.INFO_2)) if itinerary_info_guest: result['guest_name'] = itinerary_info_guest[0] # TODO (LensHo): to be fix if len(itinerary_info_guest) > 1: result['meal'] = itinerary_info_guest[-1] else: self.logger.warning('afford is empty %s', self._message_id) else: self.logger.error('guest_name is empty %s', self._message_id) itinerary_info_1 = strip_list(self._etree.xpath(aa_xp.INFO_1)) if len(itinerary_info_1) == 9: result['carrier'] = itinerary_info_1[0] result['flight_number'] = itinerary_info_1[1] result['depart_city'] = itinerary_info_1[2] result['depart_date'] = itinerary_info_1[3] result['depart_time'] = itinerary_info_1[4] result['arrive_city'] = itinerary_info_1[5] result['arrive_date'] = itinerary_info_1[6] result['arrive_time'] = itinerary_info_1[7] result['booking_code'] = itinerary_info_1[8] depart_date_time = ' '.join( [itinerary_info_1[3], itinerary_info_1[4]]) arrive_date_time = ' '.join( [itinerary_info_1[6], itinerary_info_1[7]]) tz_depart = to_timezone(result['depart_city']) tz_arrive = to_timezone(result['arrive_city']) depart_dt = DateTime(depart_date_time, 'DDMMM h:mm A') depart_year = depart_dt.received_time_to_year(self.received_time) depart_datetime_formatted = depart_dt.year_to_datetime( depart_year, tz_depart) if depart_datetime_formatted: result['depart_datetime_formatted'] = depart_datetime_formatted else: self.logger.error('depart_datetime_formatted is empty %s', self._message_id) arrive_dt = DateTime(arrive_date_time, 'DDMMM h:mm A') arrive_year = arrive_dt.received_time_to_year(self.received_time) arrive_datetime_formatted = arrive_dt.year_to_datetime( arrive_year, tz_arrive) if arrive_datetime_formatted: result['arrive_datetime_formatted'] = arrive_datetime_formatted else: self.logger.error('arrive_datetime_formatted is empty %s', self._message_id) else: self.logger.error('itinerary_details is empty %s', self._message_id) # TODO: (LensHo) might not exist itinerary_info_2 = strip_list(self._etree.xpath(aa_xp.INFO_3)) itinerary_info_trans = strip_list(self._etree.xpath(aa_xp.INFO_4)) if itinerary_info_trans: result['transfer_information'] = itinerary_info_trans[0] else: self.logger.info('transfer_information is empty %s', self._message_id) itinerary_info_afford = strip_list(self._etree.xpath(aa_xp.INFO_5)) if itinerary_info_afford: result['trans_meal'] = itinerary_info_afford[-1] if len(itinerary_info_2) == 9: depart_date_time = ' '.join( [itinerary_info_2[3], itinerary_info_2[4]]) arrive_date_time = ' '.join( [itinerary_info_2[6], itinerary_info_2[7]]) result['trans_carrier'] = itinerary_info_2[0] result['trans_flight_number'] = itinerary_info_2[1] result['trans_depart_city'] = itinerary_info_2[2] result['trans_depart_date'] = itinerary_info_2[3] result['trans_depart_time'] = itinerary_info_2[4] result['trans_arrive_city'] = itinerary_info_2[5] result['trans_arrive_date'] = itinerary_info_2[6] result['trans_arrive_time'] = itinerary_info_2[7] result['trans_booking_code'] = itinerary_info_2[8] tz_depart = to_timezone(result['trans_depart_city']) tz_arrive = to_timezone(result['trans_arrive_city']) depart_dt = DateTime(depart_date_time, 'DDMMM h:mm A') depart_year = depart_dt.received_time_to_year(self.received_time) depart_datetime_formatted_2 = depart_dt.year_to_datetime( depart_year, tz_depart) if depart_datetime_formatted_2: result['trans_depart_datetime_formatted'] = \ depart_datetime_formatted_2 else: self.logger.error( 'trans_depart_datetime_formatted is empty %s', self._message_id) arrive_dt = DateTime(arrive_date_time, 'DDMMM h:mm A') arrive_year = arrive_dt.received_time_to_year(self.received_time) arrive_datetime_formatted_2 = arrive_dt.year_to_datetime( arrive_year, tz_arrive) if arrive_datetime_formatted_2: result['trans_arrive_datetime_formatted'] = \ arrive_datetime_formatted_2 else: self.logger.error( 'trans_arrive_datetime_formatted is empty %s', self._message_id) else: self.logger.info('transfer information is empty %s', self._message_id) receipt_info = self._etree.xpath(aa_xp.INFO_6) if receipt_info: name_dict = { 'TICKET NUMBER': 'ticket_number', 'FARE-SGD': 'fare_sgd', 'EQUIV FARE-EUR': 'fare_equal_to_eur', 'TAXES AND CARRIER-IMPOSED FEES': 'taxes_fee' } for i in receipt_info[0]: name = take_first(i, './/strong/text()') value = take_first(i, './/td/text()') if value and name in name_dict.keys(): result[name_dict[name]] = value elif name in ['TICKET TOTAL', 'PASSENGER']: pass else: self.logger.warning( '%s: passengers is empty', self._message_id, ) else: self.logger.error('receipt_info is empty %s', self._message_id) total_price = strip_str(take_first(self._etree, aa_xp.TOTAL)) if total_price: result['total_cost'] = total_price else: self.logger.error('price_details is empty %s', self._message_id) notice_1 = take_first(self._etree, aa_xp.NOTICE_1) notice_3 = take_first(self._etree, aa_xp.NOTICE_3) notice_1 = notice_1 and strip_str(remove_tags( etree.tostring(notice_1))) if not notice_1: self.logger.warning('part1 of notice is empty %s', self._message_id) notice_2 = strip_str(take_first(self._etree, aa_xp.NOTICE_2)) if not notice_2: self.logger.warning('part2 of notice is empty %s', self._message_id) notice_3 = strip_str(remove_tags(etree.tostring(notice_3))) \ if notice_3 else '' if not notice_3: self.logger.warning('part3 of notice is empty %s', self._message_id) if notice_1 or notice_2 or notice_3: result['notice'] = strip_list([notice_1, notice_2, notice_3]) related_links_1 = self._etree.xpath(aa_xp.RELATED_LINK_1) if not related_links_1: self.logger.warning('part1 of related_links is empty %s', self._message_id) related_links_3 = self._etree.xpath(aa_xp.RELATED_LINK_3) if not related_links_3: self.logger.warning('part2 of related_links is empty %s', self._message_id) related_texts_1 = strip_list(self._etree.xpath(aa_xp.RELATED_TEXT_1)) related_texts_3 = strip_list(self._etree.xpath(aa_xp.RELATED_TEXT_3)) links = chain(related_links_1 or [], related_links_3 or []) texts = chain(related_texts_1 or [], related_texts_3 or []) name_dict = { 'Check-In Options': 'check_in_options_link', 'Baggage Information.': 'baggage_information_link', 'U.S. Entry Requirements.': 'us_entry_requirements_link', 'Contact American.': 'contact_american_link', 'Worldwide Phone Numbers': 'worldwide_phone_numbers_link', 'Conditions of Carriage': 'conditions_of_carriage_link' } for i, j in zip(texts, links): if i in name_dict: result[name_dict[i]] = j else: self.logger.warning('%s is %s %s', i, j, self._message_id) return result
def _parse(self): result = self._order confirm_code = take_first(self._etree, delta_xp.CONFIRM_CODE) if not confirm_code: raise MissingColumnError('confirm_code', self._message_id) confirm_code = remove_tags(etree.tostring(confirm_code)) result['confirm_code'] = ''.join(confirm_code) result.set('modify_link', take_first(self._etree, delta_xp.CHANGE_LINK)) # TODO(LensHo): 起飞降落只有一个日期 flight_date = strip_list(self._etree.xpath(delta_xp.FLIGHT_DATE)) if not flight_date: raise MissingColumnError('flight_date', self._message_id) result['depart_date'] = result['arrive_date'] = flight_date[0] if len(flight_date) >= 2: result['trans_depart_date'] = flight_date[1] result['trans_arrive_date'] = flight_date[1] flight_num = strip_list(self._etree.xpath(delta_xp.FLIGHT_NUM)) if not flight_num: raise MissingColumnError('flight_number', self._message_id) result['flight_number'] = flight_num[0] if len(flight_num) >= 2: result['trans_flight_number'] = flight_num[1] cabin = strip_list(self._etree.xpath(delta_xp.CABIN)) if cabin: result['class'] = cabin[0] if len(cabin) >= 2: result['trans_class'] = cabin[1] else: self.logger.error('class is empty %s', self._message_id) depart_station = strip_list(self._etree.xpath(delta_xp.DEPART_STATION)) if not depart_station: raise MissingColumnError('depart_station', self._message_id) result['depart_city'] = depart_station[0] if len(depart_station) >= 2: result['trans_depart_city'] = depart_station[1] depart_time = strip_list(self._etree.xpath(delta_xp.DEPART_TIME)) if not depart_time: raise MissingColumnError('depart_time', self._message_id) result['depart_time'] = depart_time[0] if len(depart_time) >= 2: result['trans_depart_time'] = depart_time[1] arrive_time = strip_list(self._etree.xpath(delta_xp.ARRIVE_TIME)) if not arrive_time: raise MissingColumnError('arrive_time', self._message_id) result['arrive_time'] = arrive_time[0] if len(arrive_time) >= 2: result['trans_arrive_time'] = arrive_time[1] elif len(arrive_time) > 2 and 'm' in arrive_time[2]: self.logger.error('换乘超过1次 %s', self._message_id) arrive_station = strip_list(self._etree.xpath(delta_xp.ARRIVE_STATION)) if not arrive_station: raise MissingColumnError('arrive_station', self._message_id) result['arrive_city'] = arrive_station[0] if len(arrive_station) >= 2: result['trans_arrive_city'] = arrive_station[1] restricted_title = strip_str( take_first(self._etree, delta_xp.RESTRICTED_TITLE)) restricted_text = take_first(self._etree, delta_xp.RESTRICTED_TEXT) if len(restricted_text) and restricted_title: text = strip_str(remove_tags(etree.tostring(restricted_text))) if 'RESTRICTED HAZARDOUS ITEMS' in restricted_title: result['restricted_hazardous_items'] = text else: # 有可能是其他条目 self.logger.warning('%s is %s %s', restricted_title, text, self._message_id) else: self.logger.error('restricted_hazardous_items is empty %s', self._message_id) result['guest_name'] = strip_str( take_first(self._etree, delta_xp.GUEST_NAME)) if not result['guest_name']: result.pop('guest_name') self.logger.error('guest_name is empty %s', self._message_id) seat = strip_list(self._etree.xpath(delta_xp.SEAT)) if seat: result['seat'] = seat[0] if len(seat) >= 2: result['trans_seat'] = seat[1] else: self.logger.error('seat is empty %s', self._message_id) result['ticket_number'] = strip_str( take_last(self._etree, delta_xp.TICKET_NUM)) if not result['ticket_number']: raise MissingColumnError('ticket_number', self._message_id) try: result['issue_date'], result['expire_date'] = strip_list( self._etree.xpath(delta_xp.ISSUE_EXPIRE_DATE)) except ValueError: self.logger.error('issue_expire_date is empty %s', self._message_id) year = int('20' + result.get('issue_date')[-2:]) \ if result.get('issue_date') else 0 tz_dp = to_timezone(result.get('depart_city')) tz_ar = to_timezone(result.get('arrive_city')) dp_dt = ' '.join([result['depart_date'], result['depart_time']]) ar_dt = ' '.join([result['arrive_date'], result['arrive_time']]) dt_format = 'DDMMM h:mmA' dt_dp = DateTime(dp_dt, dt_format) year = year or dt_dp.received_time_to_year(self.received_time) result['depart_datetime_formatted'] = dt_dp.year_to_datetime( year, tz_dp) if not result['depart_datetime_formatted']: result.pop('depart_datetime_formatted') self.logger.error('depart datetime formatted is empty %s', self._message_id) result['arrive_datetime_formatted'] = DateTime( ar_dt, dt_format).year_to_datetime(year, tz_ar) if not result['arrive_datetime_formatted']: result.pop('arrive_datetime_formatted') self.logger.error('arrive datetime formatted is empty %s', self._message_id) # TODO(LensHo): 没考虑转机时过年的情况, 只考虑换乘一次 trans_dp_date = result.get('trans_depart_date') trans_dp_time = result.get('trans_depart_time') if trans_dp_date and trans_dp_time: trans_dp_dt = trans_dp_date + ' ' + trans_dp_time result['trans_depart_datetime_formatted'] = DateTime( trans_dp_dt, dt_format).year_to_datetime(year, tz_ar) if not result['trans_depart_datetime_formatted']: result.pop('trans_depart_datetime_formatted') self.logger.error( 'trans_depart_datetime_formatted is empty %s', self._message_id) trans_ar_date = result.get('trans_arrive_date') trans_ar_time = result.get('trans_arrive_time') tz_trans_ar = to_timezone(result.get('trans_arrive_city')) if trans_ar_date and trans_ar_time: trans_ar_dt = trans_ar_date + ' ' + trans_ar_time result['trans_arrive_datetime_formatted'] = DateTime( trans_ar_dt, dt_format).year_to_datetime(year, tz_trans_ar) if not result['trans_arrive_datetime_formatted']: result.pop('trans_arrive_datetime_formatted') self.logger.error( 'trans_arrive_datetime_formatted is empty %s', self._message_id) result['payment_card_number'] = strip_str( take_first(self._etree, delta_xp.PAYMENT_METHOD_CARD)) if not result['payment_card_number']: result.pop('payment_card_number') self.logger.error('payment_card is empty %s', self._message_id) result['payment'] = strip_str( take_first(self._etree, delta_xp.PAYMENT_METHOD_MONEY)) if not result['payment']: result.pop('payment') self.logger.error('payment is empty %s', self._message_id) result['duration'] = strip_str( take_first(self._etree, delta_xp.DURATION)) if not result['duration']: result.pop('duration') self.logger.error('duration is empty %s', self._message_id) result['transportation_fare'] = strip_str( take_first(self._etree, delta_xp.BASE_FARE)) if not result['transportation_fare']: result.pop('transportation_fare') self.logger.error('transportation_fare is empty %s', self._message_id) result['price'] = strip_str(take_first(self._etree, delta_xp.TAXES_FEE)) if not result['price']: result.pop('price') self.logger.error('price is empty %s', self._message_id) result['total_cost'] = strip_str( take_first(self._etree, delta_xp.TOTAL)) if not result['total_cost']: result.pop('total_cost') self.logger.error('total_cost is empty %s', self._message_id) baggage_info = take_first(self._etree, delta_xp.BAGGAGE_INFO) baggage_text = take_first(self._etree, delta_xp.BAGGAGE_TEXT) baggage_link = take_first(self._etree, delta_xp.BAGGAGE_LINK) if baggage_info: result['baggage_allowance'] = { 'text': remove_tags(etree.tostring(baggage_info)).strip() } if baggage_link and baggage_text: result['baggage_allowance'].update( {'links': { 'name': baggage_text, 'value': baggage_link }}) else: self.logger.error('baggage_allowance is empty %s', self._message_id) hazardous_title = strip_str( take_first(self._etree, delta_xp.HAZARDOUS_MATERIALS_TITLE)) hazardous_text_1 = strip_str( take_first(self._etree, delta_xp.HAZARDOUS_MATERIALS_TEXT_1)) hazardous_text_2 = take_first(self._etree, delta_xp.HAZARDOUS_MATERIALS_TEXT_2) link = take_first(self._etree, delta_xp.HAZARDOUS_MATERIALS_LINK) text = strip_str( take_first(self._etree, delta_xp.HAZARDOUS_MATERIALS_TEXT)) if hazardous_title == 'Transportation of Hazardous Materials': if hazardous_text_2: hazardous_text_2 = remove_tags( etree.tostring(hazardous_text_2)) else: hazardous_text_2 = '' self.logger.error( 'part of hazardous_material text is empty %s', self._message_id) if hazardous_text_1 or hazardous_text_2: hazardous_text = '\n'.join( [hazardous_text_1, hazardous_text_2]).strip() result['transportation_of_hazardous_materials'] = { 'text': hazardous_text } if link and text and \ result.get('transportation_of_hazardous_materials'): result['transportation_of_hazardous_materials'].update( {'links': { 'name': text, 'value': link }}) else: self.logger.warning( 'Transportation of Hazardous Materials not found %s', self._message_id) return result
def _parse(self): self._order.set('guest_name', take_first(self._etree, wdm_xp.GUEST_NAME)) self._order['confirm_code'] = take_first(self._etree, wdm_xp.CONFIRM_NUM) if not self._order['confirm_code']: raise MissingColumnError('confirm_code', self._message_id) self._order['hotel_name'] = take_first(self._etree, wdm_xp.HOTEL_NAME) if not self._order['hotel_name']: raise MissingColumnError('hotel_name', self._message_id) address = strip_list(self._etree.xpath(wdm_xp.ADDRESS)) if not address: raise MissingColumnError('address and telephone', self._message_id) self._order['telephone'] = address.pop().strip('Phone: ') self._order['address'] = remove_space(', '.join(address)) if not (self._order['telephone'] and self._order['address']): raise MissingColumnError('address or telephone', self._message_id) self._order.set('hotel_link', take_first(self._etree, wdm_xp.HOTEL_LINK)) self._order.set('email', take_first(self._etree, wdm_xp.EMAIL)) self._order.set('modify_link', take_first(self._etree, wdm_xp.MODIFY_LINK)) self._order.set('cancellation_link', take_first(self._etree, wdm_xp.CANCEL_LINK)) reservation = strip_list(self._etree.xpath(wdm_xp.RESERVATION)) if len(reservation) < 5: raise MissingColumnError('reservation', self._message_id) self._order.set('room_tips', strip_list(reservation[0].split('\n'))) self._order.set('number_of_rooms', reservation[1].split(';')[0]) self._order.set('number_of_nights', reservation[1].split(';')[1]) self._order.set('number_of_guests', reservation[2]) check_in = reservation[3].split(' ') self._order['check_in_date'] = check_in.pop(0) self._order['check_in_time'] = ' '.join(check_in) if not (self._order['check_in_date'] and self._order['check_in_time']): raise MissingColumnError('check_in', self._message_id) checkout = reservation[4].split(' ') self._order['check_out_date'] = checkout.pop(0) self._order['check_out_time'] = ' '.join(checkout) if not (self._order['check_out_date'] and self._order['check_out_time']): raise MissingColumnError('checkout', self._message_id) tz = to_timezone(self._order['address']) self._order['check_in_date_formatted'] = DateTime( self._order['check_in_date'], 'MM/DD/YYYY').tz_to_datetime(tz) self._order['check_out_date_formatted'] = DateTime( self._order['check_out_date'], 'MM/DD/YYYY').tz_to_datetime(tz) if not (self._order['check_in_date_formatted'] and self._order['check_out_date_formatted']): raise MissingColumnError('check_in_out_date_formatted', self._message_id) price = self._etree.xpath(wdm_xp.PRICE) if price: for item in price: name = strip_str(take_first(item, './td[1]/text()')) value = strip_str(take_first(item, './td[2]/text()')) currency = strip_str(take_first(item, './td[2]/span/text()')) if name in self.name_dict: self._order[self.name_dict[name]] = value + currency else: self.logger.warning('price %s is %s %s', name, value, self._message_id) other_info = strip_list(self._etree.xpath(wdm_xp.OTHER_INFO)) for info in other_info: if info.startswith('Cancellation Policy'): self._order.set('cancellation_policies', info.strip('Cancellation Policy: ').split(';')) elif info.startswith('Payment Method: '): self._order.set('payment_card', info.strip('Payment Method: ')) else: self.logger.warning('other information %s %s', info, self._message_id) return self._order
def _parse(self): # pylint: disable=R0912,R0915 """Override""" result = self._order confirm_num_values = self._etree.xpath( booking_xp.CONFIRMATION_NUMBER_VALUES) confirm_num_values = strip_list(confirm_num_values) if len(confirm_num_values) == 2: result['confirm_code'] = confirm_num_values[0] result['pin_code'] = confirm_num_values[1] else: self.logger.error('confirm_number is empty %s', self._message_id) name = strip_str(take_first(self._etree, booking_xp.HOTEL_NAME)) if name: result['hotel_name'] = name else: self.logger.error('hotel_name is empty %s', self._message_id) address = slice_list( strip_str(take_first(self._etree, booking_xp.ADDRESS)), -3)[0] if address: result['address'] = address else: self.logger.error('address is empty %s', self._message_id) telephone = strip_str(take_first(self._etree, booking_xp.PHONE)) if telephone: result['telephone'] = telephone else: self.logger.error('telephone is empty %s', self._message_id) check_in_date_formatted = take_first( self._etree, booking_xp.CHECK_IN_DATE_FORMATTED) if check_in_date_formatted: result['check_in_date_formatted'] = \ arrow.get(check_in_date_formatted).datetime check_out_date_formatted = take_first( self._etree, booking_xp.CHECK_OUT_DATE_FORMATTED) if check_out_date_formatted: result['check_out_date_formatted'] = \ arrow.get(check_out_date_formatted).datetime check_in_date = strip_str( take_first(self._etree, booking_xp.CHECK_IN_DATE)) if check_in_date: result['check_in_date'] = check_in_date else: self.logger.error('check_in_date is empty %s', self._message_id) check_in_time = strip_str( take_first(self._etree, booking_xp.CHECK_IN_TIME) ) \ .strip('(').strip(')') if check_in_time: result['check_in_time'] = check_in_time else: self.logger.error('check_in_time is empty %s', self._message_id) check_out_date = strip_str( take_first(self._etree, booking_xp.CHECK_OUT_DATE)) if check_out_date: result['check_out_date'] = check_out_date else: self.logger.error('check_out_date is empty %s', self._message_id) check_out_time = strip_str( take_first(self._etree, booking_xp.CHECK_OUT_TIME) ) \ .strip('(').strip(')') if check_out_time: result['check_out_time'] = check_out_time else: self.logger.error('check_out_time is empty %s', self._message_id) price_details = strip_list(self._etree.xpath(booking_xp.PRICE_DETAILS)) if price_details: result['bed_type'] = price_details[0] result['price'] = price_details[1] result['total_cost'] = price_details[-1] tmp = zip(price_details[::2][1:-1], price_details[1::2][1:-1]) for i, j in tmp: self.logger.warning('%s is %s %s', i, j, self._message_id) else: self.logger.error('price_details is empty %s', self._message_id) requests = strip_list( self._etree.xpath(booking_xp.SPECIAL_REQUEST_1) or self._etree.xpath(booking_xp.SPECIAL_REQUEST_2)) your_special_requirements = [ i for i in requests if i and i != "\u2022" and i != "Special Requests" ] if your_special_requirements: result['your_special_requirements'] = your_special_requirements else: self.logger.warning('your_special_requirements is empty %s', self._message_id) free_cancellation = strip_str( take_first(self._etree, booking_xp.CANCELLATION)) free_cancellation = free_cancellation.split('.')[0].replace('\n', ' ') if free_cancellation: result['free_cancellation'] = free_cancellation else: self.logger.error('free_cancellation_policy is empty %s', self._message_id) payment_forms = strip_str( take_first(self._etree, booking_xp.PAYMENT_FORMS)) if payment_forms: result['payment_forms'] = payment_forms else: self.logger.warning('payment_forms is empty %s', self._message_id) if free_cancellation: tz = check_out_date_formatted[-5:] \ if check_out_date_formatted else to_timezone(address) free_cancellation_time = \ DateTime(free_cancellation, 'MMMM DD, YYYY hh:mm A').tz_to_datetime(tz) # D, H if str(free_cancellation_time): result['free_cancellation_time'] = free_cancellation_time notice = strip_list(self._etree.xpath( booking_xp.IMPORTANT_INFORMATION)) price_extra = strip_list(self._etree.xpath(booking_xp.PRICE_EXTRA)) if price_extra: result['price_tips'] = [ i for i in price_extra if 'Book.com' not in i ] else: self.logger.error('price_extra is empty %s', self._message_id) if notice: result['notice'] = notice else: self.logger.error('notice is empty %s', self._message_id) stay = strip_str(take_first(self._etree, booking_xp.STAY)) if stay: result['number_of_nights'] = stay.split(',')[0] result['number_of_rooms'] = stay.split(',')[-1].strip() else: self.logger.error('stay is empty %s', self._message_id) booking_summary_keys = strip_list( self._etree.xpath(booking_xp.BOOKING_SUMMERY_KEYS)) booking_summary_values = self._etree.xpath( booking_xp.BOOKING_SUMMERY_VALUES) if booking_summary_values and booking_summary_keys: booking_summary_values = strip_list([ remove_tags(etree.tostring(i)) for i in booking_summary_values ]) for k, v in zip(booking_summary_keys, booking_summary_values): if 'Cancellation cost' in k: cost = self._etree.xpath(booking_xp.CANCELLATION_COST) if cost: v = [remove_tags(etree.tostring(i)) for i in cost] v = [i.replace('\n', ' ').strip() for i in v] result['cancellation_cost'] = v elif 'Prepayment' in k: result['prepayment'] = v elif k in ['Check-in', 'Check-out', 'Your reservation']: pass else: self.logger.warning('%s is %s %s', k, v, self._message_id) else: self.logger.error('booking_summary is empty %s', self._message_id) room_tip = slice_list( strip_list(self._etree.xpath(booking_xp.ROOM_AREA)), 1)[1] if room_tip: result['room_tips'] = room_tip else: self.logger.warning('room_tip is empty %s', self._message_id) order_condition_keys = self._etree.xpath( booking_xp.BOOKING_CONDITIONS_KEYS) order_condition_keys = slice_list(order_condition_keys, 3)[0] order_condition_values = self._etree.xpath( booking_xp.BOOKING_CONDITIONS_VALUES) order_condition_values = slice_list(order_condition_values, 3)[0] if order_condition_values and order_condition_keys: order_condition_values = map( lambda i: remove_tags(etree.tostring(i)), order_condition_values) order_condition_values = strip_list(order_condition_values) name_dict = { 'Guest parking': 'guest_parking', 'Internet': 'internet' } for k, v in zip(order_condition_keys, order_condition_values): if k in order_condition_keys: result[name_dict[k]] = v elif 'Cancellation policy' in k: result['cancellation_policies'] = [v] else: self.logger.warning('%s is %s %s', k, v, self._message_id) else: self.logger.error('room_details is empty %s', self._message_id) hotel_link = take_first(self._etree, booking_xp.HOTEL_URL) if hotel_link: result['hotel_link'] = hotel_link else: self.logger.error('hotel_link is empty %s', self._message_id) address_link = take_first(self._etree, booking_xp.ADDRESS_URL) if address_link: result['map_link'] = address_link else: self.logger.error('map_link is empty %s', self._message_id) cancellation_link = take_first(self._etree, booking_xp.CANCELLATION_URL) if cancellation_link: result['cancellation_link'] = cancellation_link else: self.logger.error('cancellation_link is empty %s', self._message_id) change_keys = strip_list( self._etree.xpath(booking_xp.CHANGE_LINKS_KEYS)) change_values = self._etree.xpath(booking_xp.CHANGE_LINKS_VALUES) related_links = filter_dict_value(to_dict(change_keys, change_values)) if related_links: result['related_links'] = related_links else: self.logger.error('related_links is empty %s', self._message_id) return result
def _parse(self): # pylint: disable=R0912,R0915 result = self._order stations = self._etree.xpath(plf_xp.STATION) if stations: result['stations'] = [remove_space(i) for i in stations] else: self.logger.error('stations is empty %s', self._message_id) flight_date = strip_str(take_first(self._etree, plf_xp.DATE)) if flight_date: flight_date = flight_date.split(' - ') if len(flight_date) == 2: result['depart_date'], result['arrive_date'] = flight_date else: self.logger.error('flight_date is empty %s', self._message_id) flight_time = strip_str(take_first(self._etree, plf_xp.TIME)) if flight_time: flight_time = flight_time.split(' - ') if len(flight_time) == 2: result['depart_time'], result['arrive_time'] = flight_time else: self.logger.error('flight_time is empty %s', self._message_id) de_date, ar_date = result.get('depart_date'), result.get('arrive_date') de_time, ar_time = result.get('depart_time'), result.get('arrive_time') de_datetime = de_date and de_time and ' '.join([de_date, de_time]) ar_datetime = ar_date and ar_time and ' '.join([ar_date, ar_time]) depart, arrive = unpack(stations) tz_depart, tz_arrive = to_timezone(depart), to_timezone(arrive) if tz_arrive and tz_depart and de_datetime and ar_datetime: depart_datetime_formatted = DateTime( de_datetime, 'MMM DD YYYY hh:mm A').tz_to_datetime(tz_depart) # D if depart_datetime_formatted: result['depart_datetime_formatted'] = depart_datetime_formatted else: self.logger.error('depart_datetime_formatted is empty %s', self._message_id) arrive_datetime_formatted = DateTime( ar_datetime, 'MMM DD YYYY hh:mm A').tz_to_datetime(tz_arrive) if arrive_datetime_formatted: result['arrive_datetime_formatted'] = arrive_datetime_formatted else: self.logger.error('arrive_datetime_formatted is empty %s', self._message_id) else: self.logger.error('datetime formatted is empty %s', self._message_id) plane_model = strip_str(take_last(self._etree, plf_xp.TIME)) if plane_model: result['plane_model'] = plane_model else: self.logger.warning('plane_model is empty %s', self._message_id) mileage = strip_str(take_first(self._etree, plf_xp.MILES)) if mileage: result['flight_duration'] = mileage else: self.logger.warning('mileage is empty %s', mileage) confirm_num = take_last(self._etree, plf_xp.CONFIRM_NUM) if confirm_num: confirm_num = confirm_num.split(':')[-1].strip() result['confirm_code'] = confirm_num else: self.logger.error('confirm_number is empty %s', self._message_id) ticket = take_first(self._etree, plf_xp.TICKET) if ticket: ticket = ticket.split(':')[-1].strip() result['ticket_number'] = ticket else: self.logger.error('ticket_number is empty %s', self._message_id) depart_station = strip_str(take_first(self._etree, plf_xp.DEPART)) if depart_station: result['depart_city'] = depart_station.split(', ')[-1] result['depart_station'] = depart_station else: self.logger.error('depart_station is empty %s', self._message_id) arrive_station = strip_str(take_first(self._etree, plf_xp.ARRIVE)) if arrive_station: result['arrive_city'] = arrive_station.split(', ')[-1] result['arrive_station'] = arrive_station else: self.logger.error('arrive_station is empty %s', self._message_id) tel = take_first(self._etree, plf_xp.PHONE_PRICELINE) if tel: result['telephone'] = tel.split(':')[-1].strip() else: self.logger.error('service telephone is empty %s', self._message_id) help_keys = self._etree.xpath(plf_xp.HELP_KEYS) help_values = self._etree.xpath(plf_xp.HELP_VALUES) contact = filter_dict_value(to_dict(help_keys, help_values)) if contact: result['contact_information'] = contact else: self.logger.warning('contact_information is empty %s', self._message_id) user_tel = take_first(self._etree, plf_xp.PHONE_USER) if user_tel: result['guest_telephone'] = user_tel.split(':')[-1].strip() else: self.logger.warning('guest_telephone is empty %s', self._message_id) price_keys = strip_list(self._etree.xpath(plf_xp.PRICE_KEYS)) price_values = strip_list(self._etree.xpath(plf_xp.PRICE_VALUES)) name_dict = { 'Billing Name': 'guest_name', 'Ticket Cost': 'ticket_cost', 'Taxes & Fees': 'taxes_fee', 'Tickets': 'number_of_tickets', 'Total Price': 'total_cost', 'Bonus': 'bonus' } if price_keys and price_values: for i, j in zip(price_keys, price_values): if i in name_dict.keys(): result[name_dict[i]] = j else: self.logger.warning('%s is %s %s', i, j, self._message_id) else: self.logger.error('price_details is empty %s', self._message_id) notice = self._etree.xpath(plf_xp.NOTICE) if notice: notice = remove_tags(etree.tostring(notice[0])) \ .replace(' \n', ' ') result['notice'] = strip_list(notice.split('\n\n')) else: self.logger.error('notice is empty %s', self._message_id) related_link_text = self._etree.xpath(plf_xp.RELATED_LINK_TEXT) related_link = self._etree.xpath(plf_xp.RELATED_LINK) related_links = filter_dict_value( to_dict(related_link_text, related_link)) if related_links: result['related_links'] = related_links else: self.logger.warning('related_links is empty %s', self._message_id) return result
def _parse(self): # pylint: disable=R0912,R0915 result = self._order related_link = self._etree.xpath(spg_xp.RELATED_LINKS) related_text = strip_list(self._etree.xpath(spg_xp.RELATED_TEXT)) if related_link: result['related_links'] = to_dict(related_text, related_link) else: self.logger.error('related_links is empty %s', self._message_id) hotel_name = strip_str(take_first(self._etree, spg_xp.HOTEL_NAME)) if hotel_name: result['hotel_name'] = hotel_name else: self.logger.error('hotel_name is empty %s', self._message_id) address = self._etree.xpath(spg_xp.ADDRESS) if address and len(address) >= 3: address = remove_space(' '.join(address[:2]).replace('\t', '')) result['address'] = address else: address = '' self.logger.error('address is empty %s', self._message_id) tel = strip_str(take_first(self._etree, spg_xp.PHONE)) if tel: result['telephone'] = tel else: self.logger.error('telephone is empty %s', self._message_id) fax = strip_str(take_first(self._etree, spg_xp.FAX)) if fax: result['fax'] = fax.split(': ')[-1] else: self.logger.warning('fax is empty %s', self._message_id) confirm_num = strip_list(self._etree.xpath(spg_xp.ADDRESS)) if confirm_num and len(confirm_num) >= 4: result['confirm_code'] = confirm_num[3] else: self.logger.error('confirm_number is empty %s', self._message_id) reservation_explanation = strip_str( take_first(self._etree, spg_xp.EXPLANATION) ) if reservation_explanation: result['tip'] = reservation_explanation else: self.logger.warning( 'reservation_explanation is empty %s', self._message_id ) reservation_keys = strip_list( self._etree.xpath(spg_xp.RESERVATION_INFO_KEYS) ) reservation_values = strip_list( self._etree.xpath(spg_xp.RESERVATION_INFO_VALUES) ) if reservation_keys and reservation_values: tz = to_timezone(address) for i, j in zip(reservation_keys, reservation_values): if 'Check In' in i: check_in_date = j.split('\n')[0] check_in_time = j.split(' - ')[-1].strip(' *') if check_in_time: result['check_in_time'] = check_in_time else: self.logger.error( 'check_in_time is empty %s', self._message_id ) if check_in_date: result['check_in_date'] = check_in_date formatted = DateTime(check_in_date, 'DD-MMM-YYYY').tz_to_datetime(tz) if formatted: result['check_in_date_formatted'] = formatted else: self.logger.error( 'check_in_date_formatted is empty %s', self._message_id ) else: self.logger.error( 'check_in_date is empty %s', self._message_id ) elif 'Check Out' in i: check_out_date = j.split('\n')[0] check_out_time = j.split(' - ')[-1].strip(' *') if check_out_time: result['check_out_time'] = check_out_time else: self.logger.error( 'check_out_time is empty %s', self._message_id ) if check_out_date: result['check_out_date'] = check_out_date formatted = DateTime(check_out_date, 'DD-MMM-YYYY').tz_to_datetime(tz) if formatted: result['check_out_date_formatted'] = formatted else: self.logger.error( 'check_out_date_formatted is empty %s', self._message_id ) else: self.logger.error( 'check_in_date is empty %s', self._message_id ) elif 'Number of Guests' in i: result['number_of_guests'] = j elif 'Number of Rooms' in i: result['number_of_rooms'] = j else: self.logger.warning(' %s is %s %s', i, j, self._message_id) else: self.logger.error( 'reservation_information is empty %s', self._message_id ) accommodation_keys = strip_list( self._etree.xpath(spg_xp.ACCOMMODATION_INFO_KEYS) ) accommodation_values = \ strip_list(self._etree.xpath(spg_xp.ACCOMMODATION_INFO_VALUES)) if accommodation_keys and accommodation_values: for i, j in zip(accommodation_keys, accommodation_values): if 'Guest Name' in i: result['guest_name'] = j # TODO: (LensHo) 多个名字 elif 'Number of Adults' in i: result['number_of_adults'] = j elif 'Number of Children' in i: result['number_of_children'] = j else: self.logger.warning('%s is %s %s', i, j, self._message_id) else: self.logger.error('accommodation is empty %s', self._message_id) accommodation_details = strip_list( self._etree.xpath(spg_xp.ACCOMMODATION_DETAILS) ) if accommodation_details: result['room_tips'] = accommodation_details else: self.logger.warning( 'accommodation_details is empty %s', self._message_id ) room_description = strip_str( take_last(self._etree, spg_xp.ROOM_DESCRIPTION) ) if room_description: bed_type = room_description.split(': ')[-1] result['bed_type'] = bed_type room_tips = result.get('room_tips') if room_tips: result['room_tips'].append(room_description) else: result['room_tips'] = [room_description] else: self.logger.warning('room_tip is empty %s', self._message_id) currency = strip_str(take_first(self._etree, spg_xp.CURRENCY)) if not currency: self.logger.error('currency is empty %s', self._message_id) room_rate_avg = strip_str( take_first(self._etree, spg_xp.PER_ROOM_RATE) ) if room_rate_avg: result['price'] = ' '.join([room_rate_avg, currency]) else: self.logger.error('room_rate_avg is empty %s', self._message_id) estimated_avg = strip_str( take_first(self._etree, spg_xp.ESTIMATED_TOTAL_1) ) if estimated_avg: result['total_cost'] = ' '.join([estimated_avg, currency]) else: self.logger.error('total is empty %s', self._message_id) other_fees_name = strip_list(self._etree.xpath(spg_xp.OTHER_COST_KEYS)) other_fees_avg = strip_list( self._etree.xpath(spg_xp.OTHER_COST_VALUES_1) ) if other_fees_name and other_fees_avg: other_fees_name = other_fees_name[::2] other_fees_value = map(remove_space, other_fees_name[1::2]) other_fees_avg = other_fees_avg[1::2] other_fees = zip(other_fees_name, other_fees_value, other_fees_avg) for name, fee, avg in other_fees: if 'Value Added Tax' in name: result['taxes_fee'] = ' '.join([fee, avg, currency]) elif 'Service Charge' in name: result['service_charge'] = ' '.join([fee, avg, currency]) else: self.logger.warning( '%s is %s %s', name, fee + avg + currency, self._message_id ) else: self.logger.error('other_fee is empty %s', self._message_id) price_explanation = strip_str( take_first(self._etree, spg_xp.EXPLANATION_PRICE) ) if price_explanation: result['price_tips'] = [price_explanation] else: self.logger.warning( 'price_explanation is empty %s', self._message_id ) policies = strip_list(self._etree.xpath(spg_xp.CANCELLATION)) if policies: result['policies'] = policies else: self.logger.error('policies is empty %s', self._message_id) privacy_links = self._etree.xpath(spg_xp.PRIVACY_LINK) privacy_texts = strip_list(self._etree.xpath(spg_xp.PRIVACY_LINK_TEXT)) privacy = take_first(self._etree, spg_xp.PRIVACY) if privacy is not None: result['privacies'] = [{ 'text': strip_str(remove_tags(etree.tostring(privacy))), 'links': to_dict(privacy_texts, privacy_links) }] else: self.logger.error('privacy is empty %s', self._message_id) disclosure = self._etree.xpath(spg_xp.DISCLOSURE) if disclosure: disclosure = disclosure[0].xpath('./text() | ./*') a = [] for i in disclosure: if i == '\n' or not isinstance(i, str) and i.tag == 'br': continue elif not isinstance(i, str) and i.tag == 'strong': b = {'name': i.text, 'text': '', 'links': []} if a and not a[-1]['links']: del a[-1]['links'] a.append(b) elif isinstance(i, str): if a: a[-1]['text'] = ''.join([a[-1]['text'], i]) else: self.logger.error( 'string before strong %s %s', i, self._message_id ) elif i.tag == 'a': if a: a[-1]['text'] = ''.join([a[-1]['text'], i.text]) b = {'name': i.text, 'value': i.get('href')} a[-1]['links'].append(b) else: self.logger.error( 'tag before strong %s %s', i.tag, self._message_id ) else: self.logger.error( 'unknown tag %s %s', i.tag, self._message_id ) if a and not a[-1]['links']: del a[-1]['links'] result['disclosures'] = a else: self.logger.error('disclosure is empty %s', self._message_id) else: self.logger.error('disclosure_raw is empty %s', self._message_id) return result
def _parse(self): result = self._order result['hotel_name'] = strip_str( take_first(self._etree, tz_xp.HOTEL_NAME)) if not result['hotel_name']: raise MissingColumnError('hotel_name', self._message_id) result['address'] = ', '.join( [i.strip(',') for i in self._etree.xpath(tz_xp.ADDRESS)]) if not result['address']: raise MissingColumnError('address', self._message_id) result['telephone'] = take_first(self._etree, tz_xp.PHONE) if not result['telephone']: raise MissingColumnError('telephone', self._message_id) result.set('hotel_link', take_first(self._etree, tz_xp.WEBSITE)) name_code = strip_list(self._etree.xpath(tz_xp.NAME_CODE)) if len(name_code) < 3: raise MissingColumnError('guest_name or confirm_code', self._message_id) result['guest_name'] = name_code.pop(0) result['travelzoo_reference_code'] = name_code.pop(0) result['confirm_code'] = name_code.pop(0) # 有可能有其他额外确认信息 if name_code: self.logger.warning('other confirmation: %s %s', name_code.pop(), self._message_id) date = strip_str(take_first(self._etree, tz_xp.DATE)) if not date: raise MissingColumnError('date is empty', self._message_id) check_in_date, check_out_date = unpack(date.split(' - ')) if not check_in_date or not check_out_date: raise MissingColumnError('check_in_date ot checkout_date', self._message_id) tz = to_timezone(result.get('address')) date_format = 'MMM DD, YYYY' result['check_in_date'] = check_in_date result['check_in_date_formatted'] = DateTime( check_in_date, date_format).tz_to_datetime(tz) if not result['check_in_date_formatted']: result.pop('check_in_date_formatted') self.logger.error('check_in_date_formatted is empty %s', self._message_id) result['check_out_date'] = check_out_date result['check_out_date_formatted'] = DateTime( check_out_date, date_format).tz_to_datetime(tz) if not result['check_out_date_formatted']: result.pop('check_out_date_formatted') self.logger.error('check_out_date_formatted is empty %s', self._message_id) result['number_of_guests'] = strip_str( take_first(self._etree, tz_xp.GUEST_NUM)) if not result['number_of_guests']: result.pop('number_of_guests') self.logger.error('number of guests is empty %s', self._message_id) roomtype_price = strip_list(self._etree.xpath(tz_xp.ROOMTYPE_PRICE)) if len(roomtype_price) >= 2: result['room_type'] = roomtype_price.pop(0) price = roomtype_price.pop(0) result['price'] = price.split('- ')[-1] # 可能会有其他信息 if roomtype_price: self.logger.warning('other price: %s %s', roomtype_price.pop(), self._message_id) else: self.logger.error('price is empty %s', self._message_id) payment = self._etree.xpath(tz_xp.PAYMENT) if payment: for item in payment: name = strip_str(take_first(item, './td[1]/text()')) cost = strip_str(take_first(item, './td[2]/text()')) if not name or not cost: continue if 'Night' in name: result['number_of_nights'] = name result['total_without_taxes'] = cost elif 'tax' in name: result['taxes_name'] = name result['taxes_fee'] = cost elif 'Total' in name: result['total_cost'] = cost # 可能会有其他价格信息 else: self.logger.warning('payment: %s is %s %s', name, cost, self._message_id) else: self.logger.error('payment is empty %s', self._message_id) policies = self._etree.xpath('//b') if policies: for policy in policies: name = strip_str(take_first(policy, './text()')) value = strip_list(policy.xpath('../text()')) if name in self.name_dict: result[self.name_dict[name]] = value elif result['hotel_name'] in name: pass else: self.logger.warning('policy: %s is %s %s', name, value, self._message_id) else: self.logger.error('policies is empty %s', self._message_id) result['cancellation_link'] = take_first(self._etree, tz_xp.CANCEL_LINK) if not result['cancellation_link']: result.pop('cancellation_link') self.logger.error('cancellation link is empty %s', self._message_id) contact = self._etree.xpath(tz_xp.CONTACT) if contact: result['contact_information'] = [] for item in contact: link = take_first(item, './a/@href') if 'tel' in link: region = take_first(item, './text()') tel = take_first(item, './a/text()') if region and tel: result['contact_information'].append({ 'name': region, 'value': tel }) if not result['contact_information']: result.pop('contact_information') self.logger.warning('contact information is empty %s', self._message_id) else: self.logger.warning('contact is empty %s', self._message_id) return result
def _parse(self): # pylint: disable=R0912,R0915 result = self._order guest_name = strip_str(take_first(self._etree, accor_xp.GUEST)) if guest_name: result['guest_name'] = guest_name else: self.logger.error('guest_name is empty %s', self._message_id) confirm_num = strip_str(take_first(self._etree, accor_xp.CONFIRM_NUM)) if confirm_num: result['confirm_code'] = confirm_num else: self.logger.error('confirm_number is empty %s', self._message_id) address = ', '.join(strip_list(self._etree.xpath(accor_xp.ADDRESS))) if address: result['address'] = address else: self.logger.error('address is empty %s', self._message_id) check_in_out_date = strip_str( take_first( self._etree, accor_xp.CHECK_IN_OUT_DATE ) ) \ .split(' ') if not check_in_out_date: self.logger.error('check_in_out_date is empty %s', self._message_id) if len(check_in_out_date) == 4: check_in_date = check_in_out_date[1] check_out_date = check_in_out_date[-1] result['check_in_date'] = check_in_date result['check_out_date'] = check_out_date tz = to_timezone(address) check_in_date_formatted = DateTime(check_in_date, 'MM/DD/YYYY').tz_to_datetime(tz) if check_in_date_formatted: result['check_in_date_formatted'] = check_in_date_formatted check_out_date_formatted = DateTime( check_out_date, 'MM/DD/YYYY' ) \ .tz_to_datetime(tz) if check_out_date_formatted: result['check_out_date_formatted'] = check_out_date_formatted hotel_link = take_first(self._etree, accor_xp.HOTEL_LINK) if hotel_link: result['hotel_link'] = hotel_link else: self.logger.error('hotel_link is empty %s', self._message_id) map_link = take_first(self._etree, accor_xp.MAP_LINK) if map_link: result['map_link'] = map_link else: self.logger.error('map_link is empty %s', self._message_id) related_links = self._etree.xpath(accor_xp.RELATED_LINKS) if not related_links: self.logger.error('related_links is empty %s', self._message_id) related_texts = strip_list(self._etree.xpath(accor_xp.RELATED_TEXTS)) related_links = to_dict(related_texts, related_links) if related_links: result['related_links'] = related_links for i in related_links: if i.get('name') in 'Cancel': related_links.remove(i) result['cancellation_link'] = i.get('value') break hotel_name = strip_str(take_first(self._etree, accor_xp.HOTEL_NAME)) if hotel_name: result['hotel_name'] = hotel_name else: self.logger.error('hotel_name is empty %s', self._message_id) phone_email = strip_list(self._etree.xpath(accor_xp.PHONE_EMAIL)) phone = phone_email.pop(0) if phone_email else '' email = phone_email.pop(0) if phone_email else '' if phone: result['telephone'] = phone.split(': ')[-1] else: self.logger.error('telephone is empty %s', self._message_id) if email: result['email'] = email else: self.logger.warning('email is empty %s', self._message_id) bed_type = strip_str(take_first(self._etree, accor_xp.BED_TYPE)) if bed_type: result['bed_type'] = bed_type else: self.logger.error('bed_type is empty %s', self._message_id) room_detail = strip_str(take_first(self._etree, accor_xp.DETAILS)) if room_detail: result['room_tips'] = [room_detail] else: self.logger.error('room_details is empty %s', self._message_id) stay = strip_str(take_first(self._etree, accor_xp.STAY)) if stay: days = stay.split(', ')[-1].split(' ')[0] result['number_of_rooms'] = stay.split(' ')[0] result['number_of_nights'] = stay.split(', ')[-1] else: days = '' self.logger.error('stay is empty %s', self._message_id) room_price_values = strip_list( self._etree.xpath(accor_xp.ROOM_PRICE_VALUE)) if room_price_values: price_sum = 0 currency = '' for i in room_price_values: price_sum += float(i.split(' ')[-1]) currency = i.split(' ')[0] + ' ' if days: result['price'] = currency + \ str(round(price_sum / int(days), 2)) else: self.logger.error('price is empty %s', self._message_id) total_price_names = strip_list( self._etree.xpath(accor_xp.TOTAL_PRICE_NAME)) total_price_values = strip_list( self._etree.xpath(accor_xp.TOTAL_PRICE_VALUE)) if len(total_price_values) > 1: # last two --> total unused_total_name, total_value = total_price_values[-2:] result['total_cost'] = total_value total_price_values.pop(0) # Total price of stay else: self.logger.error('total_price is empty %s', self._message_id) if total_price_names and total_price_values: for i, j in zip(total_price_names, total_price_values): if i in 'Total amount including VAT': result['total_amount_including_vat'] = j elif i in 'Other taxes excluded': result['other_taxes_excluded'] = j else: self.logger.warning('%s is %s %s', i, j, self._message_id) info_names = strip_list(self._etree.xpath(accor_xp.INFO_NAME)) info_values = self._etree.xpath(accor_xp.INFO_VALUE) if info_values and info_names: name_dict = { 'Practical information': 'practical_information', 'Special requirements': 'your_special_requirements', 'Sales conditions': 'sales_conditions', 'Taxes': 'taxes-fee' } info_values = map(remove_tags, map(etree.tostring, info_values)) for i, j in zip(info_names, strip_list(info_values)): if i in name_dict: result[name_dict[i]] = j else: self.logger.warning('%s is %s %s', i, j, self._message_id) else: self.logger.error('notice is empty %s', self._message_id) require = result.get('your_special_requirements') if require: result['your_special_requirements'] = [require] policy_names = strip_list(self._etree.xpath(accor_xp.POLICIES_NAMES)) policy_values = strip_list(self._etree.xpath(accor_xp.POLICIES_VALUES)) if policy_names and policy_values: policy_values = policy_values[2:] for i, j in zip(policy_names, policy_values): if 'Check in Policy' in i: result['check_in_policies'] = [j] check_in_time = str(DateTime(j, 'HH:mm').to_time()) # H if check_in_time: result['check_in_time'] = check_in_time elif 'Check out Policy' in i: result['check_out_policies'] = [j] check_out_time = str(DateTime(j, 'HH:mm').to_time()) if check_out_time: result['check_out_time'] = check_out_time elif 'Cancellation policy' in i: result['cancellation_policies'] = [j] elif 'Guarantee Policy' in i: result['guarantee_policies'] = [j] else: self.logger.warning('%s is %s %s', i, j, self._message_id) return result
def _parse(self): result = self._order restaurant_name = take_first(self._etree, opentable_xp.NAME) if restaurant_name: result['restaurant_name'] = restaurant_name else: self.logger.error('restaurant_name is empty %s', self._message_id) address = ', '.join(strip_list(self._etree.xpath( opentable_xp.ADDRESS))) if restaurant_name: result['address'] = address else: self.logger.error('address is empty %s', self._message_id) guest_name = take_first(self._etree, opentable_xp.USER_NAME) if guest_name: result['guest_name'] = guest_name else: self.logger.error('guest_name is empty %s', self._message_id) confirm_number = take_first(self._etree, opentable_xp.CONFIRMATION_NUMBER) if confirm_number: result['confirm_code'] = confirm_number else: self.logger.error('confirm_number is empty %s', self._message_id) check_in_datetime = take_first(self._etree, opentable_xp.DATETIME) if check_in_datetime: result['check_in_datetime'] = check_in_datetime tz = to_timezone(address) dt = DateTime(check_in_datetime, 'MMMM D, YYYY .. h:mm A') check_in_datetime_formatted = dt.tz_to_datetime(tz) if check_in_datetime_formatted: result['check_in_datetime_formatted'] = \ check_in_datetime_formatted else: self.logger.error('check_in_datetime_formatted is empty %s', self._message_id) else: self.logger.error('check_in_datetime is empty %s', self._message_id) tel = take_first(self._etree, opentable_xp.PHONE) if tel: result['telephone'] = tel else: self.logger.error('telephone is empty %s', self._message_id) title = take_first(self._etree, opentable_xp.PROMOTED_OFFER_TITLE) if not title: self.logger.warning('promote_offer_title is empty %s', self._message_id) details = take_first(self._etree, opentable_xp.PROMOTED_OFFER_DETAILS) if not details: self.logger.warning('promote_offer_details is empty %s', self._message_id) condition = take_first(self._etree, opentable_xp.PROMOTED_OFFER_CONDITIONS) if not condition: self.logger.warning('promote_offer_condition is empty %s', self._message_id) promote_offer_values = [title, details, condition] promote_offer_names = ['title', 'details', 'conditions'] promote_offer = to_dict(promote_offer_names, promote_offer_values) promote_offer = filter_dict_value(promote_offer) if promote_offer: result['promote_offer'] = promote_offer menu = take_first(self._etree, opentable_xp.MENU) if menu: result['menu_link'] = menu else: self.logger.warning('menu_link is empty %s', self._message_id) direction = take_first(self._etree, opentable_xp.DIRECTION) if direction: result['map_link'] = direction else: self.logger.warning('direction_link is empty %s', self._message_id) restaurant_link = take_first(self._etree, opentable_xp.NAME_LINK) if restaurant_link: result['restaurant_link'] = restaurant_link else: self.logger.warning('restaurant_link is empty %s', self._message_id) links = self._etree.xpath(opentable_xp.LINK) links_name = strip_list(self._etree.xpath(opentable_xp.LINK_NAME)) name_dict = { 'Cancel': 'cancellation_link', 'Modify': 'modify_link', 'Calendar': 'calendar_link', 'Share': 'share_link' } if links and links_name: for i, j in zip(links_name, links): if i in name_dict.keys(): result[name_dict[i]] = j else: self.logger.warning('%s is %s %s', i, j, self._message_id) else: self.logger.error('related_links is empty %s', self._message_id) confirmation = '\n'.join( self._etree.xpath(opentable_xp.RESTAURANT_CONFIRMATION)) if confirmation: result['restaurant_confirmation'] = confirmation else: self.logger.warning('restaurant_confirmation is empty %s', self._message_id) restaurant_details = strip_str( take_first(self._etree, opentable_xp.RESTAURANT_DETAILS)) if restaurant_details: result['restaurant_details'] = restaurant_details else: self.logger.warning('restaurant_details is empty %s', self._message_id) notice = strip_list(self._etree.xpath(opentable_xp.NOTE)) if notice: result['notice'] = [i.split(': ')[-1] for i in notice] else: self.logger.warning('notice is empty %s', self._message_id) your_special_requirement = strip_list( self._etree.xpath(opentable_xp.SPECIAL_NOTE)) if your_special_requirement: result['your_special_requirement'] = your_special_requirement else: self.logger.info('your_special_requirement is empty %s', self._message_id) return result
def _parse(self): # pylint: disable=R0912, R0915 result = self._order confirm_number = strip_str( take_first(self._etree, united_xp.CONFIRM_NUM)) if confirm_number: result['confirm_code'] = confirm_number else: self.logger.error('conform_number is empty %s', self._message_id) traveler_info_values = self._etree.xpath( united_xp.TRAVELER_INFO_VALUES) if traveler_info_values: result['guest_name'] = traveler_info_values[0] result['ticket_number'] = traveler_info_values[1] if len(traveler_info_values) == 3: result['seats'] = traveler_info_values[2] elif len(traveler_info_values) == 4: result['frequent_flyer'] = traveler_info_values[2] result['seats'] = traveler_info_values[3] else: self.logger.error('traveler info increase %s', self._message_id) else: self.logger.error('traveler info is empty %s', self._message_id) names = [ 'flight_date', 'flight_number', 'class', 'depart_city', 'arrive_city', 'plane_model' ] flight_info_divs = self._etree.xpath(united_xp.FLIGHT_INFO_VALUES) if flight_info_divs: for i, j in zip(names, flight_info_divs): val = j.xpath('./p/span/text() | ./span/text()') if val: if i == 'flight_date' and len(val) == 2: result['depart_date'] = val[0] result['arrive_date'] = val[1] elif i == 'flight_date' and len(val) == 1: result['arrive_date'] = result['depart_date'] = val[0] else: result[i] = ' '.join(val) else: self.logger.error('flight info is empty %s', self._message_id) flight_time = self._etree.xpath(united_xp.FLIGHT_TIME) if len(flight_time) == 2: result['depart_time'] = flight_time[0] result['arrive_time'] = flight_time[1] else: self.logger.error('flight_time is empty %s', self._message_id) if (result.get('depart_date') and result.get('depart_time') and result.get('depart_city')): dt_depart = ' '.join( [result['depart_date'], result['depart_time']]) tz = to_timezone(result['depart_city']) depart_formatted = DateTime(dt_depart, 'DDMMMYY h:mm A').tz_to_datetime(tz) if depart_formatted: result['depart_datetime_formatted'] = depart_formatted else: self.logger.error('depart_datetime_formatted is empty %s', self._message_id) else: self.logger.error('depart_datetime_formatted is empty %s', self._message_id) if (result.get('arrive_date') and result.get('arrive_time') and result.get('arrive_city')): dt_arrive = ' '.join( [result['arrive_date'], result['arrive_time']]) tz = to_timezone(result['arrive_city']) arrive_formatted = DateTime(dt_arrive, 'DDMMMYY h:mm A').tz_to_datetime(tz) if arrive_formatted: result['arrive_datetime_formatted'] = arrive_formatted else: self.logger.error('arrive_datetime_formatted is empty %s', self._message_id) else: self.logger.error('arrive_datetime_formatted is empty %s', self._message_id) currency = strip_str(take_first(self._etree, united_xp.CURRENCY)) airfare = strip_str(take_first(self._etree, united_xp.AIRFARE_PRICE)) if airfare: result['price'] = ' '.join([airfare, currency]) else: self.logger.error('airfare is empty %s', self._message_id) tax_fee_price = strip_str( take_first(self._etree, united_xp.TAX_FEE_PRICE)) if tax_fee_price: result['taxes_fee'] = tax_fee_price else: self.logger.error('taxes fee is empty %s', self._message_id) per_person = strip_str( take_first(self._etree, united_xp.TOTAL_PER_PERSON)) if per_person: result['subtotal'] = ' '.join([per_person, currency]) else: self.logger.error('per person is empty %s', self._message_id) fare_total = strip_str(take_first(self._etree, united_xp.TOTAL_PRICE)) if fare_total: result['total_cost'] = ' '.join([fare_total, currency]) else: self.logger.error('total fare is empty %s', self._message_id) operator = strip_str(take_first(self._etree, united_xp.OPERATED)) if operator: result['operator'] = operator else: self.logger.error('operator is empty %s', self._message_id) payment_form = strip_str( take_first(self._etree, united_xp.PAYMENT_FORM)) if payment_form: result['payment_form'] = payment_form else: self.logger.error('payment_form is empty %s', self._message_id) tax_summary = strip_str(take_first(self._etree, united_xp.TAX_SUMMARY)) if tax_summary: result['tax_summary'] = tax_summary else: self.logger.error('taxes_summary is empty %s', self._message_id) fare_summary = strip_str( take_first(self._etree, united_xp.FARE_SUMMARY)) if fare_summary: result['fare_summary'] = fare_summary else: self.logger.error('fare_summary is empty %s', self._message_id) fare_rule = strip_str(take_first(self._etree, united_xp.FARE_RULE)) if fare_rule: result['fare_rule'] = fare_rule else: self.logger.error('fare_rule is empty %s', self._message_id) bag_fee_1 = take_first(self._etree, united_xp.BAG_FEE_1) bag_fee_2 = take_first(self._etree, united_xp.BAG_FEE_2) bag_fee_3 = take_first(self._etree, united_xp.BAG_FEE_3) bag_fee = strip_list([bag_fee_1, bag_fee_2, bag_fee_3]) if bag_fee: result['bag_fee_summary'] = bag_fee else: self.logger.error('bag_fee_summary is empty %s', self._message_id) bag_names = self._etree.xpath(united_xp.BAG_FEE_TABLE_KEYS) bag_table_names = [] if bag_names: bag_table_values = self._etree.xpath( united_xp.BAG_FEE_TABLE_VALUES) for i, j in enumerate(bag_names): s = remove_tags(etree.tostring(j)) if i == 3: s = strip_str(take_first(self._etree, united_xp.MAX_WT)) bag_table_names.append(s) table = filter_dict_value( to_dict(bag_table_names, bag_table_values)) else: table = '' self.logger.error('bag_fee_table is empty %s', self._message_id) if table: result['bag_fee_table'] = table marketing_info_trs = self._etree.xpath(united_xp.MARKETING_INFO) if marketing_info_trs: titles = marketing_info_trs[1::3] contents = marketing_info_trs[2::3] titles_list = [ strip_str(take_first(i, united_xp.MARKETING_INFO_TITLES)) for i in titles ] name_dict = { 'Important Information about MileagePlus Earning': 'mileage_plus_earning', 'International eTicket Reminders': 'eticket_reminders', 'Customer Care Contact Information': 'customer_care', 'Refunds Within 24 Hours': 'refund', 'Hazardous materials': 'hazardous_materials', 'Proud Member of Star Alliance': 'star_alliance' } for i, j in zip(titles_list, contents): s = j.xpath(united_xp.MARKETING_INFO_CONTENTS) if s: a = [] for k in s: links = k.xpath('.//a/@href') link_texts = k.xpath('.//a/text()') texts = strip_str(remove_tags(etree.tostring(k))) if links: a.append({ 'text': texts, 'links': { 'name': link_texts, 'value': links } }) else: a.append({'text': texts}) result[name_dict[i]] = a else: self.logger.error('marketing_info is empty %s', self._message_id) notice_titles = strip_list(self._etree.xpath(united_xp.NOTICES_TITLE)) notice_contents = strip_list( self._etree.xpath(united_xp.NOTICE_CONTENTS)) last_advice = strip_str(take_first(self._etree, united_xp.LAST_ADVICE)) name_dict = { 'Notice of Baggage Liability Limitations': 'baggage_liability_limit', 'Notice of Incorporated Terms': 'incorporated_terms', 'Notice of Certain Terms': 'certain_terms', 'Notice of Boarding Times': 'boarding_times', 'Advice to International Passengers on Carrier Liability': 'carrier_liability', 'Notice - Overbooking of Flights': 'overbooking' } if notice_contents and notice_titles: notice_contents[-1] += last_advice for i, j in zip(notice_titles, notice_contents): if i in name_dict.keys(): result[name_dict[i]] = j.strip('- ') else: self.logger.warning('notice %s is %s %s', i, j, self._message_id) else: self.logger.error('notice is empty %s', self._message_id) return result
def _parse(self): # pylint: disable=R0912,R0915 result = self._order hotel_name = strip_str(take_first(self._etree, marriott_xp.HOTEL_NAME)) if hotel_name: result['hotel_name'] = hotel_name else: self.logger.error('hotel_name is empty %s', self._message_id) address = strip_str(take_first(self._etree, marriott_xp.ADDRESS)) if address: result['address'] = address else: self.logger.error('address is empty %s', self._message_id) tel = take_first(self._etree, marriott_xp.PHONE) if tel: result['telephone'] = tel else: self.logger.error('telephone is empty %s', self._message_id) confirm_num = take_first(self._etree, marriott_xp.CONFIRM_NUM) if confirm_num: confirm_num = confirm_num.split(': ')[-1] result['confirm_code'] = confirm_num else: self.logger.error('confirm_number is empty %s', self._message_id) guest = take_first(self._etree, marriott_xp.GUEST) if guest: guest = guest.split('For ')[-1] result['guest_name'] = guest else: self.logger.error('guest_name is empty %s', self._message_id) check_in_out_time = strip_list( self._etree.xpath(marriott_xp.CHECK_IN_OUT_TIME)) if len(check_in_out_time) == 2: result['check_in_time'] = check_in_out_time[0] result['check_out_time'] = check_in_out_time[1] else: self.logger.error('check_in_date and check_out_time is empty %s', self._message_id) check_in_out_date = strip_list( self._etree.xpath(marriott_xp.CHECK_IN_OUT_DATE)) check_in_date, check_out_date = unpack(check_in_out_date) if check_in_date and check_out_date: result['check_in_date'] = check_in_date result['check_out_date'] = check_out_date tz = to_timezone(address) check_in_date_formatted = \ DateTime(check_in_date, 'MMMM DD, YYYY').tz_to_datetime(tz) if check_in_date_formatted: result['check_in_date_formatted'] = check_in_date_formatted else: self.logger.error('check_in_date_formatted is empty %s', self._message_id) check_out_date_formatted = \ DateTime(check_out_date, 'MMMM DD, YYYY').tz_to_datetime(tz) if check_out_date_formatted: result['check_out_date_formatted'] = check_out_date_formatted else: self.logger.error('check_out_date_formatted is empty %s', self._message_id) else: self.logger.error('check_in_date and check_out_date is empty %s', self._message_id) related_links = self._etree.xpath(marriott_xp.RELATED_LINK) related_text = self._etree.xpath(marriott_xp.RELATED_TEXT) related_links = to_dict(related_text, related_links) if related_links: related = [] for i in related_links: if 'Hotel Website' in i.get('name'): result['hotel_link'] = i.get('value') related_links.remove(i) elif 'Map & Directions' in i.get('name'): result['map_link'] = i.get('value') related_links.remove(i) elif 'Cancel' in i.get('name'): result['cancellation_link'] = i.get('value') else: related.append(i) if related: result['related_links'] = related_links else: self.logger.error('related_links is empty %s', self._message_id) room_type = take_first(self._etree, marriott_xp.ROOM_TYPE) room_type_value = take_first(self._etree, marriott_xp.ROOM_TYPE_VALUE) if room_type and room_type_value: result['room_type'] = room_type_value else: self.logger.error('room_type is empty %s', self._message_id) room_num_guest = strip_list( self._etree.xpath(marriott_xp.ROOM_NUM_GUEST)) room_num_guest_name, room_num_guest_value = group(room_num_guest) if room_num_guest_name and room_num_guest_value: for i, j in zip(room_num_guest_name, room_num_guest_value): if 'NUMBER OF ROOMS' in i: result['number_of_rooms'] = j elif 'GUESTS PER ROOM' in i: continue else: self.logger.warning('%s is %s %s', i, j, self._message_id) else: self.logger.error('room number is empty %s', self._message_id) guarantee = strip_list(self._etree.xpath( marriott_xp.GUARANTEED_METHOD)) if guarantee: result['guarantee_policies'] = [guarantee[-1]] else: self.logger.error('guarantee is empty %s', self._message_id) price_des = strip_str( take_first(self._etree, marriott_xp.CHARGE_DESCRIPTION)) if price_des: result['price_tips'] = [price_des] else: self.logger.warning('price_description is empty %s', self._message_id) notice = strip_list(self._etree.xpath(marriott_xp.HOTEL_ALERT)) if notice: result['notice'] = notice else: self.logger.error('notice is empty %s', self._message_id) rates = strip_list(self._etree.xpath(marriott_xp.RATES)) if rates: rates_type = rates.pop() if 'Best Available rate' in rates_type: nights = sum([int(i.split(' ')[0]) for i in rates[1::3]]) price = sum([float(i.split(' ')[0]) for i in rates[2::3]]) currency = ' ' + rates[2].split(' ')[-1] result['price'] = str(round(price / nights, 2)) + currency else: result['price'] = rates[-1] self.logger.error('price is empty %s', self._message_id) else: self.logger.error('rates is empty %s', self._message_id) taxes = strip_list(self._etree.xpath(marriott_xp.TAXES)) name, value = unpack(taxes) if name and value: if 'TAXES & FEES' in name: result['taxes_fee'] = taxes[-1] else: self.logger.error('%s is %s %s', name, value, self._message_id) else: self.logger.error('taxes is empty %s', self._message_id) total = strip_list(self._etree.xpath(marriott_xp.TOTAL)) name, value = unpack(total) if total: if 'Total' in name: result['total_cost'] = total[-1] else: self.logger.error('%s is %s %s', name, value, self._message_id) else: self.logger.error('total_price is empty %s', self._message_id) other_charge = strip_list(self._etree.xpath(marriott_xp.OTHER_CHARGE)) other_charge = [ i for i in other_charge if i != '\u2022' and i != 'Other Charges' ] if other_charge: result['other_charges'] = other_charge else: self.logger.warning('other_charge is empty %s', self._message_id) cancellation = take_first(self._etree, marriott_xp.RATE_CANCELLATION_DETAILS) if cancellation is not None: cancellation = replace_tags(etree.tostring(cancellation)) cancellation = cancellation.split('• \n') result['cancellation_policies'] = strip_list(cancellation) else: self.logger.error('cancellation_policy is empty %s', self._message_id) rate_guarantee_title = take_first(self._etree, marriott_xp.RATE_GUARANTEE_TITLE) rate_guarantee = strip_list( self._etree.xpath(marriott_xp.RATE_GUARANTEE)) rate_guarantee = [i for i in rate_guarantee if i != '\u2022'] if rate_guarantee and 'GUARANTEE' in rate_guarantee_title: guarantee = result.get('guarantee_policies') if guarantee: result['guarantee_policies'].extend(rate_guarantee) else: result['guarantee_policies'] = rate_guarantee else: self.logger.error('rate guarantee is empty %s', self._message_id) addition_title = take_first(self._etree, marriott_xp.ADDITION_INFO_TITLE) addition_link = strip_list( self._etree.xpath(marriott_xp.ADDITION_INFO_LINK)) addition_text = strip_list( self._etree.xpath(marriott_xp.ADDITION_INFO_TEXT)) if addition_text and 'ADDITIONAL' in addition_title: result['additional_information'] = to_dict(addition_text, addition_link) else: self.logger.error('additional information is empty %s', self._message_id) contact_links = self._etree.xpath(marriott_xp.CONTACT_LINK) contact_texts = strip_list(self._etree.xpath(marriott_xp.CONTACT_TEXT)) contact = strip_list(self._etree.xpath(marriott_xp.CONTACT_1)) contact_1 = [{'name': i, 'value': i} for i in contact] contact = chain(to_dict(contact_texts, contact_links), contact_1) contact = filter_dict_value(contact) if contact: result['contact_information'] = contact return result
def _parse(self) -> Dict: # pylint: disable=R0912,R0915,R0916 result = self._order itinerary = strip_str( take_first(self._etree, expedia_flight_xp.ITINERARY)) if itinerary: result['itinerary_code'] = itinerary else: self.logger.error('itinerary is empty %s', self._message_id) confirm_num = strip_str( take_first(self._etree, expedia_flight_xp.CONFIRM_NUM)) if confirm_num: result['confirm_code'] = confirm_num else: self.logger.error('confirm_code is empty %s', self._message_id) booking_id = strip_str( take_first(self._etree, expedia_flight_xp.BOOKING_ID)) if booking_id: result['booking_id'] = booking_id else: self.logger.error('booking_id is empty %s', self._message_id) ticket = strip_str(take_first(self._etree, expedia_flight_xp.TICKET)) if ticket: result['ticket_number'] = ticket.split(' ')[0] \ if ' ' in ticket else ticket else: self.logger.error('ticket_number is empty %s', self._message_id) depart_date, arrive_date = \ unpack(strip_list(take_first(self._etree, expedia_flight_xp.DATES).split(' - '))) if depart_date and arrive_date: result['depart_date'] = depart_date result['arrive_date'] = arrive_date else: self.logger.error('depart_date and arrive_date is empty %s', self._message_id) related_links_1 = self._etree.xpath(expedia_flight_xp.RELATED_LINK_1) if related_links_1: name_dict = { 'Change': 'change_link', 'cancel': 'cancellation_link', 'Customer Support': 'customer_support_link' } for i in related_links_1: link = take_first(i, './@href') text = take_first(i, './text()') if text in name_dict.keys(): result[name_dict[text]] = link else: self.logger.warning('%s is %s %s', text, link, self._message_id) else: self.logger.error('part 1 of related_links is empty %s', self._message_id) related_links_2 = self._etree.xpath(expedia_flight_xp.RELATED_LINK_2) related_texts_2 = self._etree.xpath(expedia_flight_xp.RELATED_TEXT_2) if related_links_2 and related_texts_2: result['related_links'] = to_dict(related_texts_2, related_links_2) else: self.logger.warning('part 2 of related_links is empty %s', self._message_id) flight_name = strip_str( take_first(self._etree, expedia_flight_xp.FLIGHT_NAME)) if flight_name: result['flight_name'] = flight_name else: self.logger.error('flight_name is empty %s', self._message_id) policies = strip_str( take_first(self._etree, expedia_flight_xp.CANCELLATION)) if policies: result['policies'] = [policies] else: self.logger.error('policies is empty %s', self._message_id) depart_station = strip_str( take_first(self._etree, expedia_flight_xp.DEPART_STATION)) if depart_station: result['depart_station'] = depart_station else: self.logger.error('depart_station is empty %s', self._message_id) depart_time = strip_str( take_first(self._etree, expedia_flight_xp.DEPART_TIME)) if depart_time: result['depart_time'] = depart_time else: self.logger.error('depart_time is empty %s', self._message_id) depart_terminal = strip_str( take_first(self._etree, expedia_flight_xp.DEPART_TERMINAL)) if depart_terminal: result['depart_terminal'] = depart_terminal else: self.logger.error('depart_terminal is empty %s', self._message_id) arrive_station = strip_str( take_first(self._etree, expedia_flight_xp.ARRIVE_STATION)) if arrive_station: result['arrive_station'] = arrive_station else: self.logger.error('arrive_station is empty %s', self._message_id) arrive_time = strip_str( take_first(self._etree, expedia_flight_xp.ARRIVE_TIME)) if arrive_time: result['arrive_time'] = arrive_time else: self.logger.error('arrive_time is empty %s', self._message_id) arrive_terminal = strip_str( take_first(self._etree, expedia_flight_xp.ARRIVE_TERMINAL)) if arrive_terminal: result['arrive_terminal'] = arrive_terminal else: self.logger.error('arrive_terminal is empty %s', self._message_id) if arrive_station \ and depart_station \ and arrive_time \ and arrive_date \ and depart_time \ and depart_date: tz_depart = to_timezone(depart_station) tz_arrive = to_timezone(arrive_station) depart_datetime = ' '.join([depart_date, depart_time]) arrive_datetime = ' '.join([arrive_date, arrive_time]) depart_datetime = \ DateTime(depart_datetime, # TODO: (LensHo) datetime formatted 'MMM DD, YYYY h:mma').tz_to_datetime(tz_depart) if depart_datetime: result['depart_datetime_formatted'] = depart_datetime else: self.logger.error('depart_datetime_formatted is empty %s', self._message_id) arrive_datetime = \ DateTime(arrive_datetime, 'MMM DD, YYYY h:mma').tz_to_datetime(tz_arrive) if arrive_datetime: result['arrive_datetime_formatted'] = arrive_datetime else: self.logger.error('arrive_datetime_formatted is empty %s', self._message_id) cabin = strip_str(take_first(self._etree, expedia_flight_xp.CABIN)) if cabin: result['cabin'] = cabin else: self.logger.error('cabin is empty %s', self._message_id) flight_info_divs = self._etree.xpath( expedia_flight_xp.FLIGHT_INFO_DIVS) if flight_info_divs: name_dict = { 'Included': 'included', 'Fee applies': 'fee_applies', 'Not included': 'not_included' } for i in flight_info_divs: name = take_first(i, expedia_flight_xp.FLIGHT_INFO_DIV_NAME) value = i.xpath(expedia_flight_xp.FLIGHT_INFO_DIV_VALUE) if name in name_dict.keys(): result[name_dict[name]] = value else: self.logger.warning('%s is %s %s', name, value, self._message_id) flight_duration = take_first(self._etree, expedia_flight_xp.FLIGHT_DURATION) flight_duration = strip_str(flight_duration) if flight_duration: result['flight_duration'] = flight_duration else: self.logger.error('flight_duration is empty %s', self._message_id) guest_name = strip_str(take_first(self._etree, expedia_flight_xp.GUEST)) if guest_name: result['guest_name'] = guest_name else: self.logger.error('guest_name is empty %s', self._message_id) price = strip_list(self._etree.xpath(expedia_flight_xp.PRICE)) if price: name_dict = {'Flight': 'price', 'Taxes & Fees': 'taxes_fee'} for i, j in zip(price[::2], price[1::2]): if 'Traveler' in i: continue if i in name_dict.keys(): result[name_dict[i]] = j else: self.logger.warning('%s is %s %s', i, j, self._message_id) else: self.logger.error('price is empty %s', self._message_id) total = strip_str(take_first(self._etree, expedia_flight_xp.TOTAL)) if total: result['total_cost'] = total else: self.logger.error('total_cost is empty %s', self._message_id) united_restrictions = self._etree.xpath( expedia_flight_xp.UNITED_RESTRICTIONS) united_restrictions = strip_list( [remove_tags(etree.tostring(i)) for i in united_restrictions]) res_des = strip_list( self._etree.xpath(expedia_flight_xp.UNITED_RESTRICTIONS_DES)) if united_restrictions: if res_des: des_num = range(len(res_des)) name_dict = {str(k + 1): v for k, v in zip(des_num, res_des)} restrictions = [] for i in united_restrictions: if i[-1].isdigit() and res_des: restrictions.append({ 'text': i, 'explanation': name_dict[i[-1]] }) else: restrictions.append({'text': i}) if restrictions: result['united_restrictions'] = restrictions else: self.logger.error('united_res is empty %s', self._message_id) else: self.logger.error('united_restrictions is empty %s', self._message_id) notice = strip_list(self._etree.xpath(expedia_flight_xp.NOTICE)) if notice: result['notice'] = notice else: self.logger.error('notice is empty %s', self._message_id) airline_rule = strip_str( take_first(self._etree, expedia_flight_xp.AIRLINE_RULE)) if airline_rule: result['airline_rule'] = airline_rule else: self.logger.error('airline_rule is empty %s', self._message_id) tel = strip_str(take_first(self._etree, expedia_flight_xp.PHONE)) if tel: result['telephone'] = tel.split(' ')[-1].strip('.') else: self.logger.error('telephone is empty %s', self._message_id) return result
def _parse(self): # pylint: disable=R0912,R0915 result = self._order hotel_name = strip_str(take_first(self._etree, ihg_xp.HOTEL_NAME)) if hotel_name: result['hotel_name'] = hotel_name else: self.logger.error('hotel_name is empty %s', self._message_id) confirm_num = strip_str(take_first(self._etree, ihg_xp.CONFIRM_NUM)) if confirm_num: result['confirm_code'] = confirm_num else: self.logger.error('confirm_number is empty %s', self._message_id) address = ', '.join(strip_list(self._etree.xpath(ihg_xp.ADDRESS))) if address: result['address'] = address else: self.logger.error('address is empty %s', self._message_id) telephone = strip_str(take_first(self._etree, ihg_xp.PHONE)) if telephone: result['telephone'] = telephone.split(': ')[-1] else: self.logger.error('telephone is empty %s', self._message_id) tz = to_timezone(address) check_in_date = strip_str(take_first(self._etree, ihg_xp.CHECK_IN_DATE)) if check_in_date: result['check_in_date'] = check_in_date else: self.logger.error('check_in_date is empty %s', self._message_id) check_out_date = strip_str( take_first(self._etree, ihg_xp.CHECK_OUT_DATE)) if check_out_date: result['check_out_date'] = check_out_date else: self.logger.error('check_out_date is empty %s', self._message_id) check_in_time = strip_str(take_first(self._etree, ihg_xp.CHECK_IN_TIME)) if check_in_time: result['check_in_time'] = check_in_time else: self.logger.error('check_in_time is empty %s', self._message_id) check_out_time = strip_str( take_first(self._etree, ihg_xp.CHECK_OUT_TIME)) if check_out_time: result['check_out_time'] = check_out_time else: self.logger.error('check_out_time is empty %s', self._message_id) check_in_date_formatted = DateTime(check_in_date, 'MM/DD/YY').tz_to_datetime(tz) # if check_in_date_formatted: result['check_in_date_formatted'] = check_in_date_formatted check_out_date_formatted = DateTime(check_out_date, 'MM/DD/YY').tz_to_datetime(tz) if check_out_date_formatted: result['check_out_date_formatted'] = check_out_date_formatted guest_name = strip_str(take_first(self._etree, ihg_xp.GUEST_NAME)) if guest_name: result['guest_name'] = guest_name else: self.logger.error('guest_name is empty %s', self._message_id) room_num = strip_str(take_first(self._etree, ihg_xp.ROOMS)) guest_num = strip_str(take_first(self._etree, ihg_xp.ADULTS)) room_type = strip_str(take_first(self._etree, ihg_xp.ROOM_TYPE)) if room_num: result['number_of_rooms'] = room_num else: self.logger.error('number of rooms is empty %s', self._message_id) if guest_num: result['number_of_guests'] = guest_num else: self.logger.error('number of guests is empty %s', self._message_id) if room_type: result['room_type'] = room_type else: self.logger.error('room_type is empty %s', self._message_id) price_des = strip_str(take_first(self._etree, ihg_xp.RATE_NOTICE)) if price_des: result['price_tips'] = [price_des] else: self.logger.error('price_description is empty %s', self._message_id) rate_names = strip_list(self._etree.xpath(ihg_xp.RATE_NAME)) rate_values = strip_list(self._etree.xpath(ihg_xp.RATE_VALUE)) if rate_values and rate_names: _, price = rate_names.pop(0), rate_values.pop(0) # noqa result['price'] = price name_dict = { 'Service Charge': 'service_charge', 'Total Taxes': 'taxes_fee', 'Estimated Total Price': 'total_cost' } for i, j in zip(rate_names, rate_values): if i in name_dict: result[name_dict[i]] = j else: self.logger.warning(' %s is %s %s', i, j, self._message_id) cancellation_name = strip_str( take_first(self._etree, ihg_xp.CANCELLATION_NAME)) cancellation_value = strip_str( take_first(self._etree, ihg_xp.CANCELLATION_VALUE)) if 'Cancellation' in cancellation_name and cancellation_value: result['cancellation_policy'] = cancellation_value else: self.logger.error('cancellation_policy is empty %s', self._message_id) rate_des_name = strip_str(take_first(self._etree, ihg_xp.RATE_DES_NAME)) rate_des_value = strip_str( take_first(self._etree, ihg_xp.RATE_DES_VALUE)) if 'Rate' in rate_des_name and rate_des_value: result['rate_tip'] = rate_des_value else: self.logger.error('rate_tip is empty %s', self._message_id) hotel_link = take_first(self._etree, ihg_xp.HOTEL_LINK) if hotel_link: result['hotel_link'] = hotel_link else: self.logger.error('hotel_link is empty %s', self._message_id) preference_link = take_first(self._etree, ihg_xp.PREFERENCE_LINK) if preference_link: result['preference_link'] = preference_link else: self.logger.error('preference_link is empty %s', self._message_id) related_links = self._etree.xpath(ihg_xp.RELATED_LINK) if related_links: related = [] for i in related_links: link = i.xpath('./a/@href') text = i.xpath('./a/text()') if link and text: if 'CANCEL' in text[0]: result['cancellation_link'] = link elif 'MODIFY' in text[0]: result['modify_link'] = link elif 'DOWNLOAD' in text[0]: pass else: related.append({'name': text[0], 'value': link}) if related: result['related_links'] = related else: self.logger.error('related_links is empty %s', self._message_id) notice = self._etree.xpath(ihg_xp.NOTICE) if notice: notice = notice[0].xpath('./text() | ./*') a = [] name_dict = { 'Early Departure Fee:': 'early_departure_fee', 'Daily Valet Parking Fee:': 'daily_valet_parking_fee', 'Pet Policy:': 'pet_policy', 'Payment Card Authorization Form:': 'payment_card_auth_form' } for i in notice: if not isinstance(i, str) and i.tag == 'br': continue elif not isinstance(i, str) and i.tag == 'span': if a and not a[-1]['links'] == 0: del a[-1]['links'] span = i.xpath('./span/text()') if span: try: name = name_dict[span[0]] b = {'text': '', 'links': []} for j in i.xpath('./text() | ./*'): if isinstance(j, str): b['text'] = ''.join([b['text'], j]) elif not isinstance(j, str) and j.tag == 'a': b['text'] = ''.join([b['text'], j.text]) c = { 'name': j.text, 'value': j.get('href') } b['links'].append(c) else: pass result[name] = b a.append(b) except KeyError as e: self.logger.exception(e) self.logger.warning('unknown key %s %s', span[0], self._message_id) else: try: name = name_dict[i.text] b = {'text': '', 'links': []} result[name] = b a.append(b) except KeyError as exc: self.logger.exception(exc) self.logger.warning('unknown key %s %s', name_dict[i.text], self._message_id) elif isinstance(i, str): if a: a[-1]['text'] = ''.join([a[-1]['text'], i]) else: self.logger.error('string before span %s %s', i, self._message_id) else: self.logger.warning('unknown tag %s %s', i.tag, self._message_id) if a and not a[-1]['links']: del a[-1]['links'] else: self.logger.error('notice is empty %s', self._message_id) tip = take_first(self._etree, ihg_xp.ADDITION_FEE) if tip: result['notice'] = tip else: self.logger.warning('hotel_information_notice is empty %s', self._message_id) return result
def parse(self): result = self._order hotel_name = strip_str(take_first(self._etree, hyatt_xp.HOTEL_NAME)) if hotel_name: result['hotel_name'] = hotel_name else: self.logger.error('hotel_name is empty %s', self._message_id) confirm_num = strip_str(take_first(self._etree, hyatt_xp.CONFIRM_NUM)) if confirm_num: confirm_num = confirm_num.split(': ')[-1] result['confirm_code'] = confirm_num else: self.logger.error('conform_number is empty %s', self._message_id) address = ', '.join(strip_list(self._etree.xpath(hyatt_xp.ADDRESS))) if address: result['address'] = address else: self.logger.error('address is empty %s', self._message_id) telephone = strip_str(take_first(self._etree, hyatt_xp.PHONE)) if telephone: result['telephone'] = telephone else: self.logger.error('telephone is empty %s', self._message_id) check_in_out_datetime = strip_list( self._etree.xpath(hyatt_xp.CHECK_IN_OUT_DATETIME)) check_in_date, check_in_time, check_out_date, check_out_time = \ unpack(check_in_out_datetime, 4) if check_out_date and check_in_date: tz = to_timezone(address) check_in_date = remove_space(check_in_date) check_out_date = remove_space(check_out_date) result['check_in_date'] = check_in_date result['check_out_date'] = check_out_date check_in_date_formatted = DateTime( check_in_date, 'MMMM DD, YYYY' ) \ .tz_to_datetime(tz) if check_in_date_formatted: result['check_in_date_formatted'] = check_in_date_formatted else: self.logger.error('check_in_date_formatted is empty %s', self._message_id) check_out_date_formatted = DateTime( check_out_date, 'MMMM DD, YYYY' ) \ .tz_to_datetime(tz) if check_out_date_formatted: result['check_out_date_formatted'] = check_out_date_formatted else: self.logger.error('check_out_date_formatted is empty %s', self._message_id) else: self.logger.error('check_in_date and check_out_date is empty %s', self._message_id) if check_in_time and check_out_time: result['check_in_time'] = check_in_time result['check_out_time'] = check_out_time else: self.logger.error('check_in_time and check_out_time is empty %s', self._message_id) related_links = self._etree.xpath(hyatt_xp.RELATED_LINKS) related_text = strip_list( self._etree.xpath(hyatt_xp.RELATED_LINKS_TEXT)) plan_link = self._etree.xpath(hyatt_xp.PLAN_LINK) plan_text = strip_list(self._etree.xpath(hyatt_xp.PLAN_TEXT)) if not related_links and related_text: self.logger.error('related_links is empty %s', self._message_id) if not plan_text and plan_link: self.logger.error('plan_links is empty %s', self._message_id) related_links = chain(related_links or [], plan_link or []) related_texts = chain(related_text, plan_text) if related_links and related_texts: name_dict = { 'Cancel Reservation': 'cancellation_link', 'Modify Reservation': 'modify_link', 'Get to know our hotel': 'hotel_link', 'Add Reservation': 'add_reservation_link', 'Customer Service': 'customer_service_link' } for i, j in zip(related_texts, related_links): if i in name_dict: result[name_dict[i]] = j else: self.logger.warning('%s is %s %s', i, j, self._message_id) else: self.logger.error('related_links is empty %s', self._message_id) reservation_info = self._etree.xpath(hyatt_xp.RESERVATION_INFO) if reservation_info: note_1 = reservation_info.pop(0) note_1 = strip_list( note_1.xpath('./table/tr/td/table/tr[2]/td/text()')) note_2 = reservation_info.pop() note_2 = strip_list(note_2.xpath('.//td/text()')) note = list(chain([note_1, note_2])) if note: result['notice'] = note else: self.logger.error('notice is empty %s', self._message_id) cancellation = reservation_info.pop() cancellation = cancellation.xpath( './table/tr/td/table/tr[2]/td/text()') cancellation = strip_list(cancellation) if cancellation: result['cancellation_policies'] = cancellation else: self.logger.error('policies is empty %s', self._message_id) name_dict = { 'Guest Name': 'guest_name', 'Number of Adults': 'number_of_guests', 'Room(s) Booked': 'number_of_rooms', 'Room Type': 'room_type', 'Type of Rate': 'rate_type', 'SERVICE CHARGE': 'service_charge', 'SALES TAX': 'taxes_fee', } for i in reservation_info: title = strip_str(take_first(i, './b/text()')) value = '\n'.join(strip_list(i.xpath('./text()'))) if title and value: if 'Room Description' in title: result['room_tips'] = [value.strip('- ')] elif 'Rate Information' in title: result['rate_tips'] = remove_space(value) elif 'Policies' in title: result['policies'] = strip_list(value.split('\n')) elif title in name_dict: result[name_dict[title]] = value else: self.logger.warning('%s is %s %s', title, value, self._message_id) else: self.logger.error('reservation_information is empty %s', self._message_id) return result
def _parse(self): # pylint: disable=R0912, R0915 result = self._order address = strip_str(take_first(self._etree, hotels_xp.ADDRESS)) if address: result['address'] = address else: self.logger.error('address is empty %s', self._message_id) confirm_num = take_first(self._etree, hotels_xp.CONFIRM_NUMBER) if confirm_num: result['confirm_code'] = confirm_num else: self.logger.error('conform_code is empty %s', self._message_id) tz = to_timezone(address) check_in_date = take_first(self._etree, hotels_xp.CHECK_IN_DATE) if check_in_date: check_in_date_formatted = DateTime( check_in_date, 'MMMM D, YYYY' ) \ .tz_to_datetime(tz) # if check_in_date_formatted: result['check_in_date_formatted'] = check_in_date_formatted result['check_in_date'] = check_in_date else: self.logger.error('check_in_date is empty %s', self._message_id) check_in_time = take_first(self._etree, hotels_xp.CHECK_IN_TIME) if check_in_time: result['check_in_time'] = check_in_time.strip('(').strip(')') else: self.logger.error('check_in_time is empty %s', self._message_id) check_out_date = take_first(self._etree, hotels_xp.CHECK_OUT_DATE) if check_out_date: check_out_date_formatted = DateTime( check_out_date, 'MMMM D, YYYY' ) \ .tz_to_datetime(tz) if check_out_date_formatted: result['check_out_date_formatted'] = check_out_date_formatted result['check_out_date'] = check_out_date else: self.logger.error('check_out_date is empty %s', self._message_id) check_out_time = take_first(self._etree, hotels_xp.CHECK_OUT_TIME) if check_out_time: result['check_out_time'] = check_out_time.strip('(').strip(')') else: self.logger.error('check_out_time is empty %s', self._message_id) stay = take_first(self._etree, hotels_xp.STAY) if stay: result['number_of_nights'] = stay.split(', ')[0] result['number_of_rooms'] = stay.split(', ')[-1] else: self.logger.error('stay is empty %s', self._message_id) cancellation_details = strip_list([ strip_str(take_first(self._etree, hotels_xp.CANCELLATION)), strip_str( take_first(self._etree, hotels_xp.CANCELLATION_POLICY_1) ), strip_str( take_first(self._etree, hotels_xp.CANCELLATION_POLICY_2) ) ]) if cancellation_details: result['cancellation_policies'] = cancellation_details else: self.logger.error('policies is empty %s', self._message_id) hotel_name = strip_str(take_first(self._etree, hotels_xp.NAME)) if hotel_name: result['hotel_name'] = hotel_name else: self.logger.error('hotel_name is empty %s', self._message_id) tel = take_first(self._etree, hotels_xp.TELEPHONE) if tel: result['telephone'] = tel.split(':')[-1].strip() else: self.logger.error('telephone is empty %s', self._message_id) notice = self._etree.xpath(hotels_xp.IMPORTANT_NOTICE) if not notice: self.logger.warning('notice is empty %s', self._message_id) required = self._etree.xpath(hotels_xp.REQUIRED) if not required: self.logger.warning('required is empty %s', self._message_id) if notice or required: result['notice'] = list(chain(notice or [], required or [])) price = strip_str(take_first(self._etree, hotels_xp.PRICE)) if price: result['price'] = price else: self.logger.error('price is empty %s', self._message_id) total = strip_str(take_first(self._etree, hotels_xp.COST)) if total: result['total_cost'] = total else: self.logger.error('total_cost is empty %s', self._message_id) guest_name = ' '.join(strip_list(self._etree.xpath(hotels_xp.ROOM_2))) if guest_name: result['guest_name'] = guest_name else: self.logger.error('guest_name is empty %s', self._message_id) icon_explanation = take_first(self._etree, hotels_xp.ICON_EXPLANATION) if icon_explanation: result['icon_explanation'] = icon_explanation else: self.logger.warning( 'icon_explanation is empty %s', self._message_id ) room_all = self._etree.xpath(hotels_xp.ROOM_ALL) bed_type = room_all.pop(0) if bed_type: result['bed_type'] = bed_type else: self.logger.error('bed_type is empty %s', self._message_id) values = [] room_tip = [] for i in room_all: if len(i.strip()) < 5: continue if '-sq-' in i: result['room_area'] = i.strip() elif i.startswith(' - '): values.append(i.split(' - ')[-1].strip()) else: room_tip.append(i.strip()) if room_tip: result['room_tips'] = room_tip else: self.logger.error('room_tips is empty %s', self._message_id) room_type = strip_str(take_first(self._etree, hotels_xp.ROOM_1)) if room_type: result['room_type'] = room_type else: self.logger.error('room_type is empty %s', self._message_id) preference = strip_str(take_first(self._etree, hotels_xp.PREFERENCE)) if preference: result['preference'] = preference else: self.logger.error('preference is empty %s', self._message_id) note = strip_str(take_first(self._etree, hotels_xp.NOTE)) if note: result['tip'] = note else: self.logger.error('tip is empty %s', self._message_id) facilities_keys = self._etree.xpath(hotels_xp.FACILITIES_KEYS) name_dict = { 'Internet': 'internet', 'Entertainment': 'entertainment', 'Food & Drink': 'food_drink', 'Sleep': 'sleep', 'Bathroom': 'bathroom', 'Practical': 'practical', 'Comfort': 'comfort' } if facilities_keys and values: for i, j in zip(facilities_keys, values): if i in name_dict.keys(): result[name_dict[i]] = j else: self.logger.warning('%s is %s %s', i, j, self._message_id) else: self.logger.error('room_details is empty %s', self._message_id) return result
def _parse(self): result = self._order result['confirm_code'] = strip_str( take_first(self._etree, ba_xp.CONFIRM_CODE)) if not result['confirm_code']: raise MissingColumnError('confirm_code', self._message_id) try: result['flight_number'] = self._etree.xpath(ba_xp.FLIGHT_NUM)[0] except IndexError as e: raise MissingColumnError('flight_number', self._message_id) from e itinerary = strip_list(self._etree.xpath(ba_xp.ITINERARY)) if len(itinerary) != 8: raise MissingColumnError('itinerary', self._message_id) result['depart_date'], result['arrive_date'] = itinerary[::4] result['depart_time'], result['arrive_time'] = itinerary[1::4] result['depart_city'], result['arrive_city'] = itinerary[2::4] result['depart_terminal'] = itinerary[3] result['arrive_terminal'] = itinerary[-1] tz_dp, tz_ar = to_timezone(itinerary[2]), to_timezone(itinerary[6]) dp_dt, ar_dt = ' '.join(itinerary[:2]), ' '.join(itinerary[4:6]) # TODO(LensHo): 小时的格式可能为H dt_format = 'd MMM YYYY HH:mm' result['depart_datetime_formatted'] = DateTime(dp_dt, dt_format) \ .tz_to_datetime(tz_dp) if not result['depart_datetime_formatted']: result.pop('depart_datetime_formatted') self.logger.error('depart_datetime_formatted is empty %s', self._message_id) result['arrive_datetime_formatted'] = DateTime(ar_dt, dt_format) \ .tz_to_datetime(tz_ar) if not result['arrive_datetime_formatted']: result.pop('arrive_datetime_formatted') self.logger.error('arrive_datetime_formatted is empty %s', self._message_id) guest = strip_list(self._etree.xpath(ba_xp.GUEST)) if guest: guest = [name for name in guest if name != 'Passenger'] result['guest_name'] = ', '.join(guest).strip(', ') result['guest_names_list'] = guest else: self.logger.error('guest_name is empty %s', self._message_id) baggage_names = strip_list(self._etree.xpath(ba_xp.BAGGAGE_NAMES)) baggage_values = strip_list(self._etree.xpath(ba_xp.BAGGAGE_VALUES)) if baggage_names and baggage_values: for name, value in zip(baggage_names, baggage_values): if name in self.name_dict: result[self.name_dict[name]] = value else: self.logger.warning('baggage information %s is %s', name, value) else: self.logger.error('baggage_allowance is empty %s', self._message_id) airline_link = self._etree.xpath(ba_xp.AIRLINE_LINK) airline_text = self._etree.xpath(ba_xp.AIRLINE_TEXT) if airline_link: result['carrier_link'] = airline_link[0] result['carrier_name'] = airline_text[0] if len(airline_link) >= 2: result['trans_airline_link'] = airline_link[1] result['trans_airline_name'] = airline_text[1] else: self.logger.warning('airline name is empty %s', self._message_id) result['disability_assistance'] = strip_str( take_first(self._etree, ba_xp.DISABILITY_ASSISTANCE)) if not result['disability_assistance']: result.pop('disability_assistance') self.logger.warning('disability assistance is empty %s', self._message_id) payment = self._etree.xpath(ba_xp.PAYMENT) if payment: for item in payment: item = strip_list(item.xpath('./td/text()')) if item: if item[0] in self.name_dict: result[self.name_dict[item[0]]] = remove_space(item[1]) elif len(item) == 1: result['ticket_number'] = ', '.join([ result.get('ticket_number', ''), remove_space(item[0]) ]) else: self.logger.warning('%s is %s %s', item[0], item[1], self._message_id) else: self.logger.error('payment is empty %s', self._message_id) result['modify_link'] = take_first(self._etree, ba_xp.CHANGE_LINKS) if not result['modify_link']: result.pop('modify_link') self.logger.error('modify link is empty %s', self._message_id) links = self._etree.xpath(ba_xp.LINKS) if links: result['related_links'] = [] for item in links: name = take_first(item, './text()') link = take_first(item, './@href') if name and link: result['related_links'].append({ 'name': name, 'link': link }) if not result: result.pop('related_links') self.logger.error('related_links is empty %s', self._message_id) else: self.logger.error('related_links is empty %s', self._message_id) return result
def _parse(self): result = self._order links = self._etree.xpath(expedia_xp.LINK) if links and len(links) == 2: result['hotel_link'] = links[0] result['map_link'] = links[1] else: self.logger.warning('hotel and map links is empty %s', self._message_id) tel_fax = take_first(self._etree, expedia_xp.HELP) if tel_fax: if 'Fax' in tel_fax: result['telephone'] = tel_fax.split(', Fax: ')[0].split( ': ')[-1] result['fax'] = tel_fax.split(':')[-1].strip() else: result['telephone'] = tel_fax.split(': ')[-1] else: self.logger.error('telephone is empty %s', self._message_id) hotel_name = strip_str(take_first(self._etree, expedia_xp.NAME)) if hotel_name: result['hotel_name'] = hotel_name else: self.logger.error('hotel_name is empty %s', self._message_id) address = strip_str(take_first(self._etree, expedia_xp.ADDRESS)) if address: result['address'] = address else: self.logger.error('address is empty %s', self._message_id) reservation_date = strip_str( take_first(self._etree, expedia_xp.RESERVATION_DATE)) if reservation_date: check_in_date, check_out_date = unpack( reservation_date.split(' - ')) else: check_in_date = check_out_date = '' self.logger.error('reservation_date_raw is empty %s', self._message_id) if check_out_date and check_in_date: result['check_in_date'] = check_in_date result['check_out_date'] = check_out_date tz = to_timezone(address) # date format is unsure check_in_date_formatted = DateTime( check_in_date, 'MMM D, YYYY' ) \ .tz_to_datetime(tz) if check_in_date_formatted: result['check_in_date_formatted'] = check_in_date_formatted check_out_date_formatted = DateTime( check_out_date, 'MMM D, YYYY' ) \ .tz_to_datetime(tz) if check_out_date_formatted: result['check_out_date_formatted'] = check_out_date_formatted confirm_number = strip_str( take_first(self._etree, expedia_xp.COMFIRM_NUMBER)) if confirm_number: result['confirm_code'] = confirm_number else: self.logger.error('confirm_number is empty %s', self._message_id) reservation_time = strip_list( self._etree.xpath(expedia_xp.CHECK_IN_OUT_TIME)) if reservation_time: check_in_time, check_out_time = unpack(reservation_time) else: check_out_time = check_in_time = '' self.logger.error('reservation_time is empty %s', self._message_id) if check_in_time and check_out_time: result['check_in_time'] = check_in_time result['check_out_time'] = check_out_time guest = strip_list(self._etree.xpath(expedia_xp.GUEST)) guest_name = guest[1].split( 'for ')[-1] if guest and len(guest) > 1 else '' if guest_name: result['guest_name'] = guest_name else: self.logger.error('guest_name is empty %s', self._message_id) room = strip_list(self._etree.xpath(expedia_xp.ROOM)) room_type = room.pop(0) if room and len(room) >= 1 else '' if room_type: result['room_type'] = room_type else: self.logger.error('room_type is empty %s', self._message_id) if room: result['included_amenities'] = [remove_space(i) for i in room] else: self.logger.error('included_amenities is empty %s', self._message_id) room_request = strip_list(self._etree.xpath(expedia_xp.ROOM_REQUEST)) if len(room_request) > 1: result['bed_type'] = room_request.pop(1) else: self.logger.error('bed_type is empty %s', self._message_id) if len(room_request) > 1: result['room_tips'] = room_request[1:] else: self.logger.error('room_tip is empty %s', self._message_id) price = strip_str(take_last(self._etree, expedia_xp.PRICE)) if price: result['price'] = price else: self.logger.error('price is empty %s', self._message_id) total = strip_str(take_first(self._etree, expedia_xp.TOTAL)) if total: result['total_cost'] = ' '.join(remove_space(total).split(' ')[1:]) else: self.logger.error('price total is empty %s', self._message_id) price_des = strip_str(take_last(self._etree, expedia_xp.PRICE_DETAILS)) if price_des: result['price_tips'] = [price_des] else: self.logger.error('price describe is empty %s', self._message_id) taxes = strip_str(take_last(self._etree, expedia_xp.TAXES)) if taxes: result['taxes_fee'] = strip_str(taxes.split(':')[-1]) else: self.logger.warning('taxes is empty %s', self._message_id) rules = strip_list(self._etree.xpath(expedia_xp.RULES)) if rules: result['notice'] = [i for i in rules if '.' in i] else: self.logger.error('notice is empty %s', self._message_id) additional_hotel_fee = take_first(self._etree, expedia_xp.ADDITIONAL_HOTEL_FEE) if additional_hotel_fee is not None: additional_hotel_fee = strip_list( remove_tags(etree.tostring(additional_hotel_fee)).replace( ' ', '').split('\n')) if len(additional_hotel_fee) > 1: result['additional_hotel_fee'] = additional_hotel_fee[1:] else: self.logger.warning('additional_hotel_fee is empty %s', self._message_id) check_in_policy = strip_list( self._etree.xpath(expedia_xp.CHECK_IN_POLICY)) if check_in_policy: result['check_in_policies'] = check_in_policy[1:] else: self.logger.warning('policies is empty %s', self._message_id) return result
def _parse(self): # pylint: disable=R0912,R0915 result = self._order name = remove_space(take_first(self._etree, priceline_xp.NAME)) if name: result['hotel_name'] = name else: self.logger.error('hotel_name is empty %s', self._message_id) address = ', '.join(self._etree.xpath(priceline_xp.ADDRESS)).strip() if address: result['address'] = address else: self.logger.error('address is empty %s', self._message_id) check_in_date = strip_str( take_last(self._etree, priceline_xp.CHECK_IN_DATE)) tz = to_timezone(address) if check_in_date: result['check_in_date'] = check_in_date check_in_date_formatted = \ DateTime(check_in_date, 'MMMM DD, YYYY').tz_to_datetime(tz) if check_in_date_formatted: result['check_in_date_formatted'] = check_in_date_formatted else: self.logger.error('check_in_date_formatted is empty %s', self._message_id) else: self.logger.error('check_in_date is empty %s', self._message_id) check_in_time = strip_str( take_last(self._etree, priceline_xp.CHECK_IN_TIME)) if check_in_time: result['check_in_time'] = check_in_time.strip('(').strip(')') else: self.logger.error('check_in_time is empty %s', self._message_id) check_out_date = strip_str( take_last(self._etree, priceline_xp.CHECK_OUT_DATE)) if check_out_date: result['check_out_date'] = check_out_date check_out_date_formatted = \ DateTime(check_out_date, 'MMMM DD, YYYY').tz_to_datetime(tz) if check_out_date_formatted: result['check_out_date_formatted'] = check_out_date_formatted else: self.logger.error('check_out_date_formatted is empty %s', self._message_id) else: self.logger.error('check_out_date is empty %s', self._message_id) check_out_time = strip_str( take_last(self._etree, priceline_xp.CHECK_OUT_TIME)) if check_out_time: result['check_out_time'] = check_out_time.strip('(').strip(')') else: self.logger.error('check_out_time is empty %s', self._message_id) tel = strip_str(take_last(self._etree, priceline_xp.PHONE)) if tel: result['telephone'] = tel else: self.logger.error('telephone is empty %s', self._message_id) confirm_num = self._etree.xpath(priceline_xp.CONFIRM_NUM) if confirm_num: confirm_num = ''.join(confirm_num).strip() pin = confirm_num.split(': ')[-1].strip(')') confirm_num = confirm_num.split(' (')[0] result['confirm_code'] = confirm_num result['pin_code'] = pin else: self.logger.error('confirm_number is empty %s', self._message_id) room_keys = strip_list(self._etree.xpath(priceline_xp.ROOM_KEYS)) room_values = strip_list(self._etree.xpath(priceline_xp.ROOM_VALUES)) if len(room_keys) == len(room_values) and room_keys: name_dict = { 'Deal Type': 'deal_type', 'Room Price': 'price', 'Number of rooms': 'number_of_rooms', 'Number of nights': 'number_of_nights' } for i, j in zip(room_keys, room_values): if i in name_dict: result[name_dict[i]] = j else: self.logger.warning('%s is%s %s', i, j, self._message_id) else: self.logger.error('price is empty %s', self._message_id) room_info_keys = strip_list( self._etree.xpath(priceline_xp.ROOM_INFO_KEYS)) room_info_values = strip_list( self._etree.xpath(priceline_xp.ROOM_INFO_VALUES)) room_info_values = list(map(remove_space, room_info_values)) if room_info_keys and room_info_values: name_dict = { 'Internet': 'internet', 'Guest Parking': 'guest_parking', 'Prepayment': 'prepayment', 'Meal Plan': 'meal_plan' } for k, v in zip(room_info_keys[::-1], room_info_values[::-1]): if k in name_dict: result[name_dict[k]] = v elif 'Room Type' in k: room_type = strip_list( self._etree.xpath(priceline_xp.ROOM_TYPE)) if room_type: result['room_type'] = ' '.join(room_type) else: self.logger.error('room_type is empty %s', self._message_id) else: self.logger.warning('%s is %s %s', k, v, self._message_id) else: self.logger.error('room_info is empty %s', self._message_id) reservation_name = strip_list( self._etree.xpath(priceline_xp.RESERVATION_NAME)) if reservation_name: result['guest_name'] = reservation_name[0].split(': ')[-1] else: self.logger.error('guest_name is empty %s', self._message_id) total = strip_str( take_first(self._etree, priceline_xp.ROOM_PRICE_TOTAL)) if total: result['total_cost'] = total else: self.logger.error('total_price is empty %s', self._message_id) room_price_keys = strip_list( self._etree.xpath(priceline_xp.ROOM_PRICE_KEYS)) room_price_keys = strip_list(room_price_keys) room_price_values = strip_list( self._etree.xpath(priceline_xp.ROOM_PRICE_VALUES)) if room_price_keys and room_price_values: name_dict = { 'Room Subtotal': 'subtotal', 'Hotel Fee': 'service_charge', 'Taxes & Fees': 'taxes_fee' } for i, j in zip(room_price_keys, room_price_values): if i in name_dict: result[name_dict[i]] = j else: self.logger.warning('%s is %s %s', i, j, self._message_id) else: self.logger.error('price_details is empty %s', self._message_id) price_extra = strip_list(self._etree.xpath(priceline_xp.PRICE_EXTRA)) price_extra = [remove_space(j) for j in price_extra] if price_extra: result['price_tips'] = price_extra else: self.logger.error('price_extra is empty %s', self._message_id) notice = strip_list(self._etree.xpath(priceline_xp.NOTICE)) notice = [remove_space(j) for j in notice] if notice: result['notice'] = notice else: self.logger.error('notice is empty %s', self._message_id) payment_type = strip_str( take_first(self._etree, priceline_xp.PAYMENT_TYPE)) if payment_type: result['payment_forms'] = payment_type else: self.logger.error('payment_form is empty %s', self._message_id) return result