def fetch_price_by_hotel(oid, checkin, checkout, days): """Fetch price from booking.cn.""" hotel = databases('hub').poi_items.find_one({'_id': ObjectId(oid)}) booking_com_id = hotel.get('booking_com_id') if booking_com_id: logger.error('Can not be here.') return else: name = hotel.get('name') city_id = hotel.get('city') city_name = (databases('hub') .meta_cities .find_one({'_id': city_id}) .get('name')) resp = requests.get( settings.PARITY, params={ 'checkin': checkin, 'checkout': checkout, 'hotel': name, 'city': city_name } ) assert resp.status_code == 200, resp.text data = resp.json() assert data['status'] == 200, data try: price = data['prices'][0]['price'] except (IndexError, KeyError) as exc: logger.warning(f'Hotel({name}) not found. {data}', exc_info=exc) return if '元' in price: currency = 'CNY' elif '¥' in price: currency = 'CNY' elif '€' in price: currency = 'EUR' elif '$' in price: currency = 'USD' else: raise TypeError(price) price = Decimal(PRE.search(price).group(1).replace(',', '')) if currency != 'CNY': exchange_rate = get_exchange_rate(currency, 'CNY') price *= exchange_rate return push_to_influxdb(oid, hotel['name'], checkin, checkout, days, price)
def find_hotel_name(provider, hotel_id): if provider == "cms": hub = databases("hub") data = hub["poi_items"].find_one({"_id": ObjectId(hotel_id)}, { "name_en": "1", "city": "1" }) if not data: logger.info(f"invalid cms_id : {hotel_id}") return False city = hub["meta_cities"].find_one({"_id": data.get("city", "")}, {"name_en": "1"}) if not city or not city.get("name_en", ""): logger.info( f"invalid city_id of hotel: {hotel_id}, {data.get('city')}") city_name = "" else: city_name = city["name_en"] return f"{data.get('name_en')} {city_name}" quoter_coll = settings.SUPPLIER_ID_2_COLL.get( provider) or settings.SUPPLIER_NAME_2_COLL.get(provider) if not quoter_coll: return False if quoter_coll == "wg_hotel": db = databases("whotel") else: db = databases("scripture") condition = {"$or": []} try: int_hid = int(hotel_id) condition["$or"].append({"hotel_id": int_hid}) except Exception: pass try: str_hid = str(hotel_id) condition["$or"].append({"code": str_hid}) condition["$or"].append({"hotel_id": str_hid}) except Exception: pass hotel_msg = db[quoter_coll].find_one(condition, { "name": "1", "province": "1" }) if not hotel_msg: return False hotel_name = ( f"{hotel_msg['name'].replace('&', ' ')} {hotel_msg.get('city', {'name': ''})['name'] or hotel_msg.get('province', '')}" ) return hotel_name
def send_tf_id_errormsg(): ''' 查询出历史TF酒店ID配错或已失效的数据 统计完成后从表中删除 ''' db = databases('scripture') data = db['taskmsg.availability'].find({"type": "travflex_id_error"}) text = "# 有线上酒店填写的Travflex酒店ID已失效 \n\n" nums = 0 last_updated_time = None for e in data: if not last_updated_time or e['updated_at'] > last_updated_time: last_updated_time = e['updated_at'] ids.append(e['hotel_id']) text += f"- cms链接: http://wop.feifanweige.com/admin/hotels/{e['hotel_id']}\n" nums += 1 payload = { "msgtype": "markdown", "markdown": { "title": "有线上酒店填写的Travflex酒店ID已失效", "text": text, }, } resp = requests.post( "https://oapi.dingtalk.com/robot/send", params=Ding_params, json=payload, ) db['taskmsg.availability'].remove({"type": "travflex_id_error", "updated_at": {"$lte": last_updated_time}})
def db_update_type(cms_id, checkin, price_type, prices): db = databases('scripture') condition = {'hotel_id': cms_id, 'prices.checkin': checkin} upload = {"$currentDate": {"updated_at": True}} price = prices['price'] if price_type == -1 or not price or isinstance(price, str): upload['$set'] = {'prices.$.bug_price_type': -1} else: upload['$set'] = {'prices.$': prices} old_price = db['statics.hotels.prices'].find_one({'hotel_id': cms_id, 'prices.checkin': checkin}, {'prices.$'}) # 正常不会出现此种情况,仅在发布任务的5分钟后原数据仍未更新到数据库中时才会出现,出现则抛弃此次查询结果 if not old_price: logger.warning(f"bug_price_check_task_before_data_insert: {cms_id}, {checkin}") return thre_price = old_price['prices'][0].get('bug_thre_price', 0) if price > thre_price: upload['$set']['prices.$']['bug_price_type'] = 0 else: upload['$set']['prices.$']['bug_price_type'] = 1 upload['$set']['prices.$']['bug_thre_price'] = thre_price res = db['statics.hotels.prices'].update_one( condition, upload )
def update_bug_price_type(cms_id, checkin): hub = databases('hub') hotel = hub["poi_items"].find_one( {"_id": ObjectId(cms_id)}, {"quote_ids": "1", "min_booking_days": "1", "city": "1"}, ) min_booking_days = int(hotel.get("min_booking_days", 1)) checkout = (datetime.strptime(checkin, "%Y-%m-%d") + timedelta(days=min_booking_days)).strftime("%Y-%m-%d") payload = dict( checkin=checkin, checkout=checkout, roomfilters=[{"adults": 2}], quoters=[ {"quoter": str(value["quoter"]), "hotel_id": value["hotel_id"]} for value in hotel["quote_ids"] if value["hotel_id"].strip() != "" ], ) try: # 每个任务仅查询一个酒店的一个日期,只一次网络IO resp = requests.post( quotes_api, headers={"x-query-from": "robot"}, json=payload ) res = resp.json() except Exception as exc: logger.error( f"{checkin} {hotel['_id']} get price faild!\ndetail : {resp.content}", exc_info=exc, ) return db_update_type(cms_id, checkin, -1, {}) if not res or res["status"] != 200 or not res["data"].get("categorized"): return db_update_type(cms_id, checkin, -1, {}) _min_price_room = list(res["data"]["categorized"].values())[0][0] supplier_rooms = find_each_min_supplier(res["data"]["categorized"]) _min_price = float(_min_price_room.get("total_price", 9999999)) _min_supplier = _min_price_room.get("identity", {}).get( "provider", "Unknown" ) _city_rate = hub['meta_cities'].find_one({"_id": hotel['city']}, {'tax_rate': '1'}) if not _city_rate: city_rate = 0.05 else: city_rate = float(_city_rate.get('tax_rate', 0.05)) without_tax_price = math.ceil(_min_price * (1-city_rate)) prices = { "checkin": checkin, "checkout": checkout, "price": _min_price, 'without_tax_price': without_tax_price, "ori_price": float( _min_price_room.get("ori_total_price_cny", 9999999999999) ), "supplier": _min_supplier, "room_type_en": _min_price_room.get("room_type", ""), "room_type_cn": _min_price_room.get("translation", ""), "each_supplier": supplier_rooms, "updated_at": datetime.now(), } return db_update_type(cms_id, checkin, 1, prices)
def room_types(self): with requests.Session() as sess: params = { "userName": self.user, "password": self.pswd, "language": "en" } resp = sess.get(f"{self.room_meal_entry_point}/GetRoomTypes", params=params) root = ET.fromstring(resp.content.decode('utf-8')) scripture = databases("scripture") for child in root[0]: scripture['statics.hotels.jactravel.roomTypes'].update_one( {'code': str(child[0].text)}, { '$set': { 'code': str(child[0].text), 'id': str(child[0].text), 'room_type': str(child[1].text), 'sharedRoom': str(child[2].text), 'sharedFacilities': str(child[3].text), }, "$setOnInsert": { "created_at": datetime.now() }, "$currentDate": { "last_modified": True }, }, upsert=True, )
def calendar_all(): hub = databases("hub") max_num = hub["feature_switch"].find_one( {"table": "Activity"}, {"config": "1"} ) try: max_num = max_num["config"]["max_price_calendar_hotel"] except Exception as exc: logger.error(f"cannot get max_num ! ", exc_info=exc) exit() try: resp = requests.get( f"{settings.CMS_API}/api/internal/configs/hotel", params={"configs": "price_calendar_display_day_span"}, headers={"accept-version": "6.0.0"}, ) if resp and resp.status_code == 200: days = resp.json()["data"].get( "price_calendar_display_day_span", 130 ) except Exception as exc: logger.error(f"get calendar days error!", exc_info=exc) days = 130 for hotel in ( hub["poi_items"] .find( {"has_price_calendar": True}, {"quote_ids": "1", "min_booking_days": "1"}, ) .sort([("updatedAt", -1)]) .limit(max_num) ): calendar_one.delay(str(hotel["_id"]), days=days)
def packages(): hub = databases("hub") packages = hub.sku_packages.find( { "$and": [ { "edit_status": { "$in": ["edited", "audited"] } }, { "publish_status": { "$ne": "offline" } }, { "has_relevant_hotel": True }, ] }, { "hotels": 1, "daily_inventory": 1, "air_price": 1, "appreciation_fee": 1, "inventory_updated_at": 1, }, ) return (package_check.chunks( [[json.dumps(pkg, default=on_json_serialize)] for pkg in packages], 16, ) | on_finished.s()).apply_async()
def meal_types(self): with requests.Session() as sess: params = { "userName": self.user, "password": self.pswd, "language": "en" } resp = sess.get(f"{self.room_meal_entry_point}/GetMeals", params=params) root = ET.fromstring(resp.content.decode('utf-8')) scripture = databases("scripture") for child in root[0]: datas = { 'id': child[0].text, 'code': str(child[0].text), 'meal_type': child[1].text, 'lables': [] } for lable in child[2]: datas['lables'].append({ 'lable_id': lable[0].text, 'lable_content': lable[1].text }) scripture['statics.hotels.jactravel.mealTypes'].update_one( {'code': datas['code']}, { '$set': datas, '$setOnInsert': { 'created_at': datetime.now() }, "$currentDate": { "last_modified": True } }, upsert=True)
def news_with_loc(loc: str) -> bool: """fetch google news by location""" headers = { 'User-Agent': ('Mozilla/5.0 (X11, Linux x86_64) AppleWebKit 537.36 ' '(KHTML, like Gecko) Chrome/59.0.3071.86 Safari/537.36'), } session = requests.Session() session.proxies = settings.PROXIES resp = session.get( 'https://news.google.com/news/rss/local/section/geo/{}'.format(loc), params={ 'ned': 'us', 'hl': 'en' }, headers=headers, timeout=5, ) if resp.status_code != 200: logger.warning('Bad response %s %s %s', resp.status_code, resp.url, resp.reason) scripture = databases('scripture') hub = databases('ai') Story.__db_table__ = scripture.cp_stories AINews.__db_table__ = hub.ai_news feed = feedparser.parse(resp.text) for entry in feed['entries']: story = Story(entry) city, country_code = loc.rsplit(',', 1) story['city'] = city.strip() story['country_code'] = country_code.strip() published_at = arrow.get(story['published'], 'D MMM YYYY HH:mm:ss ZZZ') story['published_at'] = published_at.datetime story['scope'] = 'google' try: ai_story = AINews.from_story(**story.save()) ai_story.save() logger.info('Successful to save %s to db', story['id']) logger.debug('Story %s', story) except Exception as exc: # pylint: disable=W0703 logger.error("%s", story) logger.exception(exc) return True
def hotel_matching(collection_name, query): scripture = databases("scripture") # collections = [scripture.bookings, scripture.hotels, scripture.ctrips] matching = Matching() crawled_hotel = scripture.get_collection(collection_name).find_one( json.loads(query), no_cursor_timeout=True ) matching.one(crawled_hotel, collection_name) return True
def news_with_topic(topic: str) -> bool: """fetch google news by topic""" headers = { 'User-Agent': ('Mozilla/5.0 (X11, Linux x86_64) AppleWebKit 537.36 ' '(KHTML, like Gecko) Chrome/59.0.3071.86 Safari/537.36'), } session = requests.Session() session.proxies = settings.PROXIES url = 'https://news.google.com/news/rss/headlines/section/topic/' + \ topic.upper() resp = session.get(url, params={ 'ned': 'us', 'hl': 'en' }, headers=headers, timeout=5) if resp.status_code != 200: logger.warning('Bad response %s %s %s', resp.status_code, resp.url, resp.reason) scripture = databases('scripture') hub = databases('ai') Story.__db_table__ = scripture.cp_stories AINews.__db_table__ = hub.ai_news feed = feedparser.parse(resp.text) for entry in feed['entries']: story = Story(entry) story['scope'] = 'google' story['topic'] = topic published_at = arrow.get(story['published'], 'D MMM YYYY HH:mm:ss ZZZ') story['published_at'] = published_at.datetime ai_story = AINews.from_story(**story) try: ai_story = AINews.from_story(**story.save()) ai_story.save() logger.info('Successful to save %s to db', story) except Exception as exc: # pylint: disable=W0703 logger.exception(exc) return True
def hotel_check(): hub = databases("hub") scripture = databases("scripture") scripture["hotel.online.check"].update_one({"__t": "flag"}, {"$set": { "refreshing": True }}, upsert=True) base_day = datetime.now() onlines = [] for online in hub["poi_items"].find( { "__t": "Hotel", "edit_status": { "$in": ["edited", "audited"] }, "publish_status": "online", }, { "_id": "1", "quote_ids": "1", "name": "1", "name_en": "1", 'address': '1', 'en.address': '1' }, ): onlines.append((online, base_day)) for available in executer.map(check_price, onlines): if available: available["updated_at"] = datetime.now() scripture["hotel.online.check"].update_one( {"hotel_id": available["_id"]}, {"$set": available}, upsert=True, ) scripture["hotel.online.check"].update_one({"__t": "flag"}, {"$set": { "refreshing": False }}, upsert=True)
def save_order_message(order_message: Dict, email: str) -> Dict[str, Any]: """Save parsed message to mongo """ if not order_message or \ not isinstance(order_message, dict) or \ 'message_id' not in order_message: logger.debug(order_message) raise Ignore(order_message) scripture = databases('scripture') u_result = scripture.g_orders.update_one( { 'email': email, 'message_id': order_message['message_id'] }, { '$set': order_message, '$setOnInsert': { 'created_at': datetime.now() }, '$currentDate': { 'updated_at': True } }, upsert=True) scripture.g_users.update_one( {'email': email}, {'$currentDate': { 'last_fetched_at': True, 'updated_at': True }}) modified = u_result.raw_result.get('nModified') == 1 and \ u_result.raw_result.get('ok') == 1 sent_to_ai = False if u_result.upserted_id is not None: ai_endpoint = f'{ai_base}{mapping[order_message["category"]]}' headers = {'cache-control': 'no-cache'} data = order_message.copy() data['capture_id'] = u_result.upserted_id data['email'] = email resp = requests.post(ai_endpoint, data=data, headers=headers) sent_to_ai = True if resp.status_code != 200: sent_to_ai = False logger.error('Failed when sent to ai: %s', resp.text) return { 'modified': modified, 'is_updated': u_result.upserted_id is None, 'is_inserted': u_result.upserted_id is not None, 'sent_to_ai': sent_to_ai }
def _get_sem_map(): db = databases("scripture") sem = db.statics.sem.find() result = {} for item in sem: result[f'{item["sem_name"]}={item["sem_id"]}'] = { "unit": item["unit"], "plan": item["plan"], "keywords": item["keywords"], } return result
def incremental_fetch(email: str) -> bool: """fetch more email""" scripture = databases('scripture') user = scripture.g_users.find_one({'email': email, 'authenticated': True}) if not user: return False # TODO: Add Dingtalk notify of other imformations do_request.apply_async( (email, user['access_token']), # TODO: only token, pop user's info link=dispatcher.s(email=email, token=user, uid=user['id'])) return True
def dispatcher(messages: Dict[str, Any], email: str, uid: str, token: str): """Dispatch request and response Args: messages: List email: string token: sting Returns: None Raises: Ignore """ if len(messages['messages']) < 1: raise Ignore(None) save_cb = save_order_message.s(email=email) parse_order_message_cb = parse_order_message.s(email=email, uid=uid) parse_order_message_cb.link(save_cb) is_order_message_cb = is_order_message.s() is_order_message_cb.link(parse_order_message_cb) # do_request_s = do_request.s(token=token) group([ do_request.signature( (f'users/{email}/messages/{message["id"]}', token), link=is_order_message_cb ) for message in messages['messages'][:-1] ]) \ .apply_async() last_message = messages['messages'][-1] result = do_request(f'users/{email}/messages/{last_message["id"]}', token=token) scripture = databases('scripture') last_fetched_at = scripture.g_users \ .find_one({'email': email}) \ .get('last_fetched_at') if must_request_next_page(result, last_fetched_at): params = {'pageToken': messages['nextPageToken']} do_request.apply_async((f'users/{email}/messages/', token, params), link=dispatcher.s(email=email, uid=uid, token=token)) else: incremental_fetch.apply_async((email, ), eta=datetime.now() + timedelta(days=1)) is_order_message.apply_async((result, ), link=parse_order_message_cb)
def multi(self): """TODO: Docstring for function. Args: arg1 (TODO): TODO Returns: TODO """ scripture = databases("scripture") collections = [scripture.bookings, scripture.hotels, scripture.ctrips] for collection in collections: for crawled_hotel in collection.find(no_cursor_timeout=True): self.one(crawled_hotel, collection.name)
def _update_sem(sem_file_path): db = databases("scripture") db.statics.sem.remove() with open(sem_file_path, "r") as f: sem = json.load(f) c = [{ "sem_id": k.split("=")[1], "sem_name": k.split("=")[0], "unit": sem[k]["unit"], "plan": sem[k]["plan"], "keywords": sem[k]["keywords"], } for k in sem] db.statics.sem.insert_many(c) logger.info("Inserted %s", db.statics.sem.count_documents({}))
def get_skyscanner(start_time, days, sid=None, hotel_id=None, hotel_name=None): if not hotel_id and not hotel_name: logger.info(f"skyscanner withou hotel_id and name! sid: {sid}") return False if not sid: if hotel_id: hub = databases("hub") ori_sid = hub["poi_items"].find_one( {"_id": ObjectId(hotel_id)}, {"third_ref_ids": "1"} ) if not ori_sid.get("third_ref_ids"): logger.info( f"hotel without skyscanner id! hotel_id : {hotel_id}" ) return False return crawl_one( start_time, days, ori_sid["third_ref_ids"][0]["value"], hotel_id, hotel_name, ) scripture = databases("scripture") ori_sid = scripture["statics.hotels.skyscanner"].find_one( {"name": {"$regex": hotel_name}}, {"sid": "1"} ) if not ori_sid: logger.info( f"hotel_name not find in skyscanner datas! hotel_name : {hotel_name}" ) return False return crawl_one( start_time, days, ori_sid["sid"], hotel_id, hotel_name ) else: return crawl_one(start_time, days, sid, hotel_id, hotel_name)
def booking_com(): """从booking.cn抓取酒店价格.""" packages = databases('hub').sku_packages.find() checkin = datetime.now() + timedelta(days=15) for pkg in packages: for htl in pkg['hotels']: oid = str(htl['hotel']) days = htl['days'] for interval in range(10): checkin += timedelta(days=interval) checkout = checkin + timedelta(days=days) fetch_price_by_hotel.delay( oid, checkin.strftime('%Y-%m-%d'), checkout.strftime('%Y-%m-%d'), days )
def compare_data(new_prices,checkin_str,cms_id,url): scripture = databases('scripture') db_prices = scripture.statics.booking.prices.find_one( {"cms_id": cms_id, 'prices.checkin': checkin_str}, { 'prices.$': 1 } ) if not db_prices and new_prices: logger.error(f'爬虫失效/数据库失连,数据库cms_id:{cms_id},目标url:{url}') title = 'booking_prices爬虫失效/数据库失连' text = f'## [告警]booking爬虫失效\n,数据库cms_id:{cms_id},目标url:{url}' dingding(title, text) elif db_prices and not new_prices: logger.error(f'比价模块失效,数据库cms_id:{cms_id},目标url:{url}') title = 'booking_prices比价模块失效' text = f'## [告警]booking比价模块失效\n,数据库cms_id:{cms_id},目标url:{url}' dingding(title, text) elif db_prices and new_prices: db_prices = db_prices.get('prices')[0].get('prices') new_prices_info = {} for one_room_dict in new_prices: room_info_hash = Simhash( f'{one_room_dict["occupancy"]}{one_room_dict["room_type"]}{one_room_dict["policies"]}' ).value new_prices_info[room_info_hash] = one_room_dict["price"] for one_room_dict in db_prices: room_info_hash = Simhash( f'{one_room_dict["occupancy"]}{one_room_dict["room_type"]}{one_room_dict["policies"]}' ).value db_one_room_price = one_room_dict.get('price') new_one_room_price = new_prices_info.get(room_info_hash) if not new_one_room_price: logger.info(f'{kwargs}在{checkin_str}的房型({one_room_dict["room_type"]})已售出') if compare_price(db_one_room_price, new_one_room_price): _info = { 'url': url, 'checkin': checkin_str, 'room_type': one_room_dict["room_type"], 'mongodb_price': db_one_room_price, 'celery_task_get_price': new_one_room_price } title = 'booking_prices异常' text = f'## [告警]booking抓取价格异常\n{_info}' dingding(title, text)
def make_requests(): """dispatch request by localtion or topic""" scripture = databases('scripture') avalible_topics = ('technology', 'business', 'entertainment', 'sports', 'science', 'health') for topic in avalible_topics: news_with_topic.apply_async([topic], time_limit=5, soft_time_limit=3) gevent.sleep(1) cursor = scripture.countries \ .find({'population': {'$gte': 10000}}, no_cursor_timeout=True) \ .sort('population', -1) for loc in cursor: location = '{}, {}'.format(loc['ascii_name'], loc['country_code']) news_with_loc.apply_async([location], time_limit=5, soft_time_limit=3) gevent.sleep(1) return True
def get_booking_url(provider, hotel_id=None, hotel_name=None): if provider == "cms": hub = databases("hub") data = hub["poi_items"].find_one( { "_id": ObjectId(hotel_id), "crawl_info.crawl_website": "bk_url" }, {"crawl_info.$": "1"}, ) if data and data.get("crawl_info"): return str(URL(data["crawl_info"][0]["crawl_url"]).path) if not hotel_name: hotel_name = find_hotel_name(provider, hotel_id) if not hotel_name: logger.info(f"not find hotel_name with {provider}, {hotel_id}") return False query_url = ( f"https://www.booking.com/searchresults.zh-cn.html?ss={hotel_name}") resp = requests.get( query_url, headers={ "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36" }, ) if resp.status_code != 200: # logger.error(f'hotel: {hotel_id}, {hotel_name} get url failed.') return False et = etree.HTML(resp.content.decode("utf-8")) try: hotel_path = (et.xpath('//a[@class="hotel_name_link url"]/@href') [0].strip().split("?")[0]) return hotel_path except Exception as exc: logger.error( f"hotel: {hotel_id}, {hotel_name} get url in {query_url} failed.", exc_info=exc, ) return False
def _set_relationships(self, collection, object_id, rel_collection, rel_object_id): agent = databases("agent") has_relationship = agent.get_collection(collection).find_one({ "relation_to_crawled.rel_collection": rel_collection, "relation_to_crawled.rel_object_id": rel_object_id, }) if has_relationship: return True updated = agent.get_collection(collection).update_one( {"_id": object_id}, { "$push": { "relation_to_crawled": { "rel_collection": rel_collection, "rel_object_id": rel_object_id, } } }, ) return updated.raw_result
'color': 'yellow' }, 'error': { 'color': 'red' }, 'critical': { 'bold': True, 'color': 'red' } } coloredlogs.install(level='DEBUG', isatty=True, level_styles=LEVEL_STYLE, fmt='%(asctime)s %(name)s %(levelname)s %(message)s') AGENT = databases('agent') auth = oss2.Auth(settings.OSS_ACCESS_KEY_ID, settings.OSS_SECRET_ACCESS_KEY) OSS = oss2.Bucket(auth, settings.OSS_ENDPOINT, settings.OSS_BUCKET) def statistic_cdn_image(provider: enum.Enum) -> Tuple[int, int, str]: """ 上传document中cdn_images上传失败的图片,并统计供应商图片个数、成功个数、成功率 Args: provider: 供应商 Returns: 图片总数,图片上传成功个数,成功率,cdn_images上传成功率 """ count = 0 success = 0
from functools import partial from bson import ObjectId from celery.utils.log import get_task_logger from tasks.application import app # noqa from tasks.utils.database import databases # noqa # First Party from tasks import settings from tasks.supplier_statics.hotel_name import fetch_ctrip_name from tasks.supplier_statics.postal_code import get_province_by_postal_code from tasks.supplier_statics.supplier_images import ImageSaver from tasks.utils.notifiers import DingtalkMessage, DingtalkNotifier from tasks.errors import NotifyFailed from tasks import settings DB = databases("scripture") HUB = databases("hub") key_list = ["latitude", "longitude", 'telephone', "website"] class Providers(enum.Enum): bonotel = "bonotel" roomsxml = "roomsxml" hotelbeds = "hotelbeds" hotelspro = "hotelspro" jactravel = "jactravel" relux = "relux" relux_rooms = 'relux.rooms' class BaseSupplier(object):
def check_preparation( hotels, start_time=None, end_time=None, max_days=None, min_booking_days=1 ): """ hotels: [ { 'provider': Provider.provider or 'cms' or 'providers' 'hotel_id': hotel_id or cms_id or id1::provider_id1;id2::provider_id2 } ] """ hub = databases("hub") for hotel in hotels: if hotel["provider"] == "cms": data = hub["poi_items"].find_one( {"_id": ObjectId(hotel["hotel_id"])}, {"quote_ids": "1", "min_booking_days": "1"}, ) if not data: logger.error(f"not find cms hotel with {hotel['hotel_id']}!") continue p_hotel = { "id": hotel["hotel_id"], "hotels": [ {"quoter": str(e["quoter"]), "hotel_id": e["hotel_id"]} for e in data["quote_ids"] ], } preparation_one.delay( hotel=p_hotel, start_time=start_time, end_time=end_time, max_days=max_days, min_booking_days=data.get("min_booking_days", 1), ) elif hotel["provider"] == "providers": p_hotel = {"id": hotel["hotel_id"], "hotels": []} for _ in hotel["hotel_id"].split(";"): hotel_id, provider = _.split("::") provider = settings.SUPPLIER_NAME_2_ID.get(provider, provider) p_hotel["hotels"].append( {"quoter": provider, "hotel_id": hotel_id} ) preparation_one.delay( hotel=p_hotel, start_time=start_time, end_time=end_time, max_days=max_days, min_booking_days=min_booking_days, ) else: provider = settings.SUPPLIER_NAME_2_ID.get( hotel["provider"], hotel["provider"] ) p_hotel = { "id": f"{hotel['hotel_id']}::{provider}", "hotels": [ {"quoter": provider, "hotel_id": hotel["hotel_id"]} ], } preparation_one.delay( hotel=p_hotel, start_time=start_time, end_time=end_time, max_days=max_days, min_booking_days=min_booking_days, ) return "preparation check publish succeed"
from functools import lru_cache # First Party import yaml import pinyin from pysolr import Solr from pymongo import MongoClient from tasks.utils.database import databases from bson import ObjectId # Current Project import requests # from solrcloudpy import SolrConnection db = databases("agent") hub_db = databases("hub") solr_host = "172.16.1.223" def push_hotels(cursor, supplier): solr = Solr(f"http://{solr_host}/solr/hotels") docs = [] index = 1 total = cursor.count() for doc in cursor: d = { "id": str(doc["_id"]), "name": doc["name"], "name_cn": doc.get("name_cn", ""),
else: self.logger.critical("city name missing: city code(%s) ", hotel["destination"]) country = self.table("countries").find_one({"code": hotel["country"]}) doc["country"] = {"code": hotel["country"]} if country: doc["country"]["name"] = country.get("name") else: self.logger.critical("country name missing: country code(%s) ", hotel["country"]) if doc["regions"]: doc["province"] = self.get_province_by_region_codes( doc["regions"]) or "" if hotel["images"]: doc["images"] = [ img["original"] for img in hotel["images"] if img.get("original") ] doc["updated_at"] = datetime.strptime(hotel["updated_at"], "%Y-%m-%dT%H:%M:%S.%fZ") if 'code' not in doc: doc['code'] = str(doc.get('hotel_id', '')) return doc if __name__ == "__main__": from tasks.utils.database import databases HotelsPro(databases("scripture")).regions()