def from_jset(self, jset_id): jset = self.db.jsets.find_one({"jset_id": jset_id}) if not jset: raise JsetNotFound(jset_id) # noqa intro = strip_tags(jset["overview"]) tips = strip_tags(jset["what_to_know"]) recommend = strip_tags(jset["what_we_love"]) travel_tips = strip_tags(jset["travel_tips"]) if isinstance(travel_tips, list): travel_tips = "\n".join(travel_tips) add = JsetAdditional(jset["origin_body"]) location = (jset.get("geocode", {}).get("geometry", {}).get("location", {})) self._doc["en"] = { "name": jset["name"].split("(")[0].strip(), "address": self.get_address(jset), "introduction": "\n".join(intro), "recommend_reason": "\n".join(recommend), "info": travel_tips, "tips": "\n".join(tips), } self._doc["city_name"] = self.city or add.city() self._doc["recommend_reason"] = recommend self._doc["jset_name"] = self._doc["en"]["name"] self._doc["jset_name_en"] = self._doc["en"]["name"] self._doc["name"] = self._doc["en"]["name"] self._doc["name_en"] = self._doc["en"]["name"] self._doc["address"] = self._doc["en"]["address"] self._doc["latitude"] = location.get("lat") or jset["latitude"] self._doc["longitude"] = location.get("lng") or jset["longitude"] self._doc["rating"] = add.rating() self._doc["comments_url"] = self.__comments_url or add.url() self._doc["comments_from"] = "TripAdvisor" self._doc["gallery"] = add.images() self._doc["cover_image_url"] = self._doc["gallery"][0]["image_url"] if not self._capture_id: self._doc.update(self.capture_url(jset_id=jset_id)) self._doc["price"] = add.price() or 0 self._doc["introduction"] = intro self._doc["tips"] = tips self._doc["traffic_info"] = travel_tips self._doc["jset_id"] = jset_id self.__loaded_from_jset = True u = self.telephone_and_website(self._doc["capture_id"], self._doc["address"]) if u: logger.debug(u) self._doc.update(u) else: logger.debug("Bad telephone and website, %s", self._doc["address"])
def from_jset(self, jset_id): jset = self.db.jsets.find_one({'jset_id': jset_id}) if not jset: raise JsetNotFound(jset_id) # noqa intro = strip_tags(jset['overview']) tips = strip_tags(jset['what_to_know']) recommend = strip_tags(jset['what_we_love']) travel_tips = strip_tags(jset['travel_tips']) if isinstance(travel_tips, list): travel_tips = '\n'.join(travel_tips) add = JsetAdditional(jset['origin_body']) location = jset.get('geocode', {}).get('geometry', {}) \ .get('location', {}) self._doc['en'] = { 'name': jset['name'].split('(')[0].strip(), 'address': self.get_address(jset), 'introduction': '\n'.join(intro), 'recommend_reason': '\n'.join(recommend), 'info': travel_tips, 'tips': '\n'.join(tips), } self._doc['city_name'] = self.city or add.city() self._doc['recommend_reason'] = recommend self._doc['jset_name'] = self._doc['en']['name'] self._doc['jset_name_en'] = self._doc['en']['name'] self._doc['name'] = self._doc['en']['name'] self._doc['name_en'] = self._doc['en']['name'] self._doc['address'] = self._doc['en']['address'] self._doc['latitude'] = location.get('lat') or jset['latitude'] self._doc['longitude'] = location.get('lng') or jset['longitude'] self._doc['rating'] = add.rating() self._doc['comments_url'] = self.__comments_url or add.url() self._doc['comments_from'] = 'TripAdvisor' self._doc['gallery'] = add.images() self._doc['cover_image_url'] = self._doc['gallery'][0]['image_url'] if not self._capture_id: self._doc.update(self.capture_url(jset_id=jset_id)) self._doc['price'] = add.price() or 0 self._doc['introduction'] = intro self._doc['tips'] = tips self._doc['traffic_info'] = travel_tips self._doc['jset_id'] = jset_id self.__loaded_from_jset = True u = self.telephone_and_website(self._doc['capture_id'], self._doc['address']) if u: logger.debug(u) else: logger.debug('Bad telephone and website, %s', self._doc['address']) self._doc.update(u)
async def jset_payload(jset): travel_tips = jset['travel_tips'] if isinstance(travel_tips, list): travel_tips = strip_tags('\n'.join(travel_tips)) travel_tips_zh = await translate(travel_tips) log.warn('list') elif isinstance(travel_tips, str) and travel_tips != '': travel_tips = strip_tags(travel_tips) travel_tips_zh = await translate(travel_tips) else: log.warn( f'Unkown type<{type(travel_tips)}> of travel_tips: {jset["url"]}' ) # noqa travel_tips = '' travel_tips_zh = '' location = jset.get('geometry', {}).get('location') if not location: location = {'lat': jset['latitude'], 'lng': jset['longitude']} addition = JsetAdditional(jset['origin_body']) en = { 'name': jset['name'], 'address': jset.get('formatted_address', ','.join(jset.get('how_to_get_there'))), 'introduction': strip_tags('\n'.join(jset['overview'])), 'recommend_reason': strip_tags('\n'.join(jset['what_we_love'])), 'info': travel_tips, 'tips': strip_tags('\n'.join(jset['what_to_know'])) } item = { 'en': en, 'city_name': addition.city(), 'recommend_reason': await translate(en['recommend_reason']), 'name': await translate(jset['name']), 'name_en': jset['name'], 'hotel_id': jset['hotel_id'], 'address': await translate(en['address']), 'latitude': location['lat'], 'longitude': location['lng'], 'rating': addition.rating(), 'comments_url': addition.url(), 'comments_from': 'TripAdvisor', 'gallery': addition.images(), 'capture_url': jset['url'], 'price': addition.price() or 0, 'introduction': await translate(en['introduction']), 'tips': await translate(en['tips']), 'traffic_info': travel_tips_zh, } log.error(item['price']) return item
def from_rxml(self, hotel_id): hotel = self.db.rxmls.find_one({'hotel_id': hotel_id}) if not hotel: raise RoomsxmlNotFound(hotel_id) # noqa self._doc['hotel_id'] = hotel_id self.__loaded_from_rxml = True if self.__loaded_from_jset: return self._doc['city_name'] = self.city or hotel['address']['city'] self._doc['address'] = self.get_address(hotel) self._doc['latitude'] = hotel['latitude'] self._doc['longitude'] = hotel['longitude'] self._doc['rank'] = hotel['rank'] self._doc['roomsxml_name'] = hotel['name'] self._doc['roomsxml_address'] = self._doc['address'] self._doc['name'] = hotel['name'] self._doc['name_en'] = hotel['name'] self._doc['introduction'] = strip_tags(hotel['description']['text']) if not self._capture_id: self._doc.update( self.capture_url(hotel_id=hotel_id, jset_id=self._jset_id)) u = self.telephone_and_website( self._doc['capture_id'], ', '.join([self._doc['name'], self._doc['address']])) if u: logger.debug(u) self._doc.update(u) else: logger.debug('Bad telephone and website, %s', self._doc['address'])
def from_rxml(self, hotel_id): hotel = self.db.rxmls.find_one({"hotel_id": hotel_id}) if not hotel: raise RoomsxmlNotFound(hotel_id) # noqa self._doc["hotel_id"] = hotel_id self.__loaded_from_rxml = True if self.__loaded_from_jset: return self._doc["city_name"] = self.city or hotel["address"]["city"] self._doc["address"] = self.get_address(hotel) self._doc["latitude"] = hotel["latitude"] self._doc["longitude"] = hotel["longitude"] self._doc["rank"] = hotel["rank"] self._doc["roomsxml_name"] = hotel["name"] self._doc["roomsxml_address"] = self._doc["address"] self._doc["name"] = hotel["name"] self._doc["name_en"] = hotel["name"] self._doc["introduction"] = strip_tags(hotel["description"]["text"]) if not self._capture_id: self._doc.update( self.capture_url(hotel_id=hotel_id, jset_id=self._jset_id)) u = self.telephone_and_website( self._doc["capture_id"], ", ".join([self._doc["name"], self._doc["address"]]), ) if u: logger.debug(u) self._doc.update(u) else: logger.debug("Bad telephone and website, %s", self._doc["address"])
async def jet_hotel(jets, geocode): travel_tips = jets['travel_tips'] if isinstance(travel_tips, list): travel_tips = strip_tags('\n'.join(travel_tips)) travel_tips_zh = await translate(travel_tips) else: travel_tips = '' travel_tips_zh = '' en = { 'name': jets['name'], 'address': geocode['formatted_address'], 'introduction': strip_tags('\n'.join(jets['overview'])), 'merchant': '', 'recommend_reason': strip_tags('\n'.join(jets['what_we_love'])), 'tips': travel_tips, 'info': strip_tags('\n'.join(jets['what_to_know'])) } return { 'name': await translate(jets['name']), 'name_en': jets['name'], 'hotel_id': jets['hotel_id'], 'address': await translate(geocode['formatted_address']), 'latitude': geocode['loc'][1], 'longitude': geocode['loc'][0], 'ranking': '', 'rating': '', 'telephone': '', 'website': '', 'price': 0, 'indtroduction': await translate(en['introduction']), 'tips': await translate(en['info']), 'traffic_info': travel_tips_zh, 'recommend_reason': await translate(en['recommend_reason']), 'cover_images': '', 'capture_url': jets['url'], 'en': en }
def payload(dl): # bt.trans = lambda x: [{'dst': x}] digit = re.compile('[0-9\.]+') currency = re.compile('[^A-Z]([A-Z]{3})[^A-Z]') addr = dl.get('company', {}).get('address', []) addr = ','.join(addr) if not addr: return None name_x = dl.get('title') \ or dl['origin'].get('hdl') \ or dl['origin'].get('fhd') name_en = name_x name = trans.translate(name_en, dest='zh-CN').text city = dl['where'] price = dl.get('prices', {}).get('promo_price') if not price: price = dl['price'] try: price = float(digit.findall(price)[0]) except: print('Bad price: {}'.format(dl['price'])) print('Title: {}'.format(name_x)) print('Url is: {}'.format(dl['url'])) price = 0 price_type = currency.search(dl['edition_disclaimer']).group(1) cover_images = dl['src_image'] _merchant = dl['source'] # merchant = trans.translate(_merchant, dest='zh-CN').text _highlights = dl.get('highlights', {}) highlights_zh = [] highlights_en = [] the_deal = strip_tags(_highlights.get('the_deal', '')) if the_deal: highlights_en.append({'title': 'The Deal', 'description': the_deal}) highlights_zh.append({ 'title': 'The Deal', 'description': trans.translate(the_deal, dest='zh-CN').text }) why_love = map(strip_tags, _highlights.get('why_we_love_it', [])) for why in why_love: why_t = trans.translate(why, dest='zh-CN').text if not why_t: continue highlights_en.append({'title': 'Why We Love It', 'description': why}) highlights_zh.append({'title': 'Why We Love It', 'description': why_t}) _info = '\n'.join(map(strip_tags, dl.get('whats_included', []))) info = trans.translate(_info.replace('\n', '|'), dest='zh-CN') \ .text.replace('|', '\n') _tips = strip_tags(dl.get('term', '').replace('\n', '|')) if _tips: tips = trans.translate(_tips, dest='zh-CN').text.replace('|', '\n') else: tips = '' phone = dl.get('company', {}).get('telephone') available_start = dl['start_time'] available_end = dl['end_time'] capture_url = url_cleaner(dl['url']) short_introduction = dl['summary_keywords'] introduction = dl.get('overview') en = { 'merchant': _merchant, 'highlights': highlights_en, 'info': _info, 'tips': _tips, 'address': addr, } d = { 'name': name, 'name_en': name_en, 'city_ref': city, 'info_ref': _info, 'tips_ref': _tips, 'tags_ref': '', 'price': float(price), 'price_type': price_type, 'merchant': _merchant, 'highlights': highlights_zh, 'cover_images': [{ 'image_url': cover_images }], 'info': info, 'tips': tips, 'address': addr, 'available_start': available_start, 'available_end': available_end, 'capture_url': capture_url, 'en': en, 'introduction': strip_tags(introduction), 'short_introduction': strip_tags(short_introduction) } if phone: d['phone'] = phone return d