# @Site : # @File : qyer_list_test.py # @Software: PyCharm from proj.my_lib.Common.Task import Task from proj.total_tasks import hilton_tax_task if __name__ == '__main__': args = { "check_in": "20180128", "city_id": "50012", "source_id": "NYCDTDT", "source": "hilton", "date_index": 0 } task = Task(_worker='', _task_id='demo', _source='hilton', _type='hotel_list', _task_name='hilton_tax_test', _used_times=0, max_retry_times=6, kwargs=args, _queue='hotel_list', _routine_key='hotel_list', list_task_token='test', task_type=0, collection='10.19.2.103') hilton_tax_task(task=task)
if source in key: class_name = all_sdk.get(key) break return class_name def _execute(self, **kwargs): source = self.task.source class_name = self.get_task_sdk(source) sdk_object = class_name(self.task) sdk_object.execute() if __name__ == "__main__": args = {'keyword': '纽约'} task = Task(_worker='', _task_id='demo', _source='daodaocity', _type='poi_list', _task_name='daodao_city_suggest', _used_times=0, max_retry_times=6, kwargs=args, _queue='poi_list', _routine_key='poi_list', list_task_token='test', task_type=0, collection='') normal = SlowTaskSDK(task) normal.execute()
task = Task( _worker='proj.total_tasks.images_task', _task_id='176ddbc7960c2a6f6d8d7c9baea65617', _source='tuniuGT', _type='DownloadImages', _task_name='image_GT_tuniu_20180414', _used_times=0, max_retry_times=6, # kwargs={"source": "daodao", "new_part": "detail_attr_daodao_20171122a", # "target_url": "https://ccm.ddcdn.com/ext/photo-s/0f/dd/44/61/peaceful-time.jpg", # "source_id": "test", "bucket_name": "mioji-attr", "is_poi_task": True, "part": "20171122a", # "file_prefix": ""}, # kwargs={ # "source": "ihg", # "new_part": "detail_hotel_ihg_20171220a", # "target_url": "https://ihg.scene7.com/is/image/ihg/candlewood-suites-idaho-falls-3053752126-4x3?fmt=png-alpha", # "source_id": "idapd", # "bucket_name": "mioji-hotel", # "is_poi_task": False, # "part": "20171220a", # "file_prefix": "" # }, kwargs={ 'file_prefix': 'tuniuGT', 'target_url': "https://m.tuniucdn.com//fb2/t1/G3/M00/3D/0B/Cii_LlloJcyIGfs2AFyrXaRNclAAADnOgNM364AXKt1824_w640_h480_c1_t0.jpg", 'source_id': "210138695", 'need_insert_db': True, 'source': "tuniuGT", 'bucket_name': 'mioji-grouptravel', 'is_poi_task': True, }, _queue='file_downloader', _routine_key='file_downloader', list_task_token='', task_type=0)
# "country_id": "NULL", # "source": "holiday", # "part": "detail_hotel_holiday_20171226a", # "city_id": "NULL", # "source_id": "ABYSY" # }, # task_type=TaskType.NORMAL, list_task_token=None) # # print(hotel_detail_task(task=task)) task = Task(_worker='', _queue='hotel_detail', _routine_key='hotel_detail', _task_id='demo', _source='fourseasons', _type='hotel', _task_name='detail_hotel_fourseasons_20180325a', _used_times=0, max_retry_times=10, kwargs={ # "url": "http://www.booking.com\n/hotel/ph/tg-hometel.zh-cn.html?label=gen173nr-1DCAEoggJCAlhYSDNiBW5vcmVmcgV1c19jYYgBAZgBMsIBA2FibsgBDNgBA-gBAZICAXmoAgQ;sid=3b827f3aa2e3fca0a95ec0d56605f64a;checkin=2018-01-08;checkout=2018-01-11;ucfs=1;soh=1;srpvid=511e686ec99000f9;srepoch=1511448670;highlighted_blocks=;all_sr_blocks=;room1=A%2CA;soldout=0%2C0;hpos=10;hapos=520;dest_type=region;dest_id=5374;srfid=0a39626563bec2b30fbbedccb1438d4e5f55493fX520;from=searchresults;soldout_clicked=1\n;highlight_room=#no_availability_msg", # "url": "https://www.ihg.com/holidayinnexpress/hotels/cn/zh/teluk/hoteldetail#####https://apis.ihg.com/hotels/v1/profiles/TELUK/details", # "url": "https://www.expedia.com.hk/Bhimtal-Hotels-Emerald-Trail.h4474316.Hotel-Information?chkin=2017%2F12%2F6&chkout=2017%2F12%2F7&rm1=a2®ionId=6139790&sort=recommended&hwrqCacheKey=b07edfbf-68f1-472b-b58d-d153dc82d7feHWRQ1511794413272&vip=false&c=c8d5ec02-71e2-496b-aa9f-5988e64b7931&", # "url": "https://www.booking.com/hotel/us/new-lakefront-home-4br-47-2b-in-katy-west-houston.zh-cn.html?aid=376390;label=misc-aHhSC9cmXHUO1ZtqOcw05wS94870954985%3Apl%3Ata%3Ap1%3Ap2%3Aac%3Aap1t1%3Aneg%3Afi%3Atikwd-11455299683%3Alp9061505%3Ali%3Adec%3Adm;sid=760b4b8ac503b49f5d89e67ec36a2fa9;aer=1;dest_id=20126498;dest_type=city;dist=0;hapos=90;hpos=15;room1=A%2CA;sb_price_type=total;spdest=ci%2F20126498;spdist=41.0;srepoch=1511794977;srfid=75643f0d9b7ac3fe31b60ecc58ba9f10b377fd16X90;srpvid=fdcc69d0f9a606d5;type=total;ucfs=1&#hotelTmpl", # "url": "https://www.expedia.com.hk/Hotels-Beautiful.h19200665.Hotel-Information", "url": "https://www.fourseasons.com/en/maldivesfse/", "country_id": "NULL", "source": "fourseasons", "part": "detail_hotel_fourseasons_20180325a", "city_id": "NULL", "source_id": "baku" }, task_type=TaskType.NORMAL, list_task_token=None) print(hotel_detail_task(task=task))
# @Author : Hou Rong # @Site : # @File : qyer_list_test.py # @Software: PyCharm from proj.my_lib.Common.Task import Task from proj.total_tasks import qyer_list_task if __name__ == '__main__': # args = { # 'source': 'qyer', # 'country_id': '412', # 'city_id': '40051', # 'check_in': '20170925', # 'city_url': 'http://place.qyer.com/praslin-island/' # } args = { "check_in": "20180128", "city_id": "20645", "country_id": "133", "source": "qyer", "city_url": "http://place.qyer.com/albania/", "date_index": 0 } task = Task(_worker='', _task_id='demo', _source='qyer', _type='poi_list', _task_name='list_qyer_total_test', _used_times=0, max_retry_times=6, kwargs=args, _queue='poi_list', _routine_key='poi_list', list_task_token='test', task_type=0, collection='') qyer_list_task(task=task)
else: url = source_interface[source].format(keyword) response = session.get(url=url,) get_suggest = getattr(sys.modules[__name__],'get_{0}_suggest'.format(source)) count = get_suggest(response.content,map_info,country_id,city_id,database_name,keyword) if count >= 0: self.task.error_code = 0 except Exception as e: print(e) raise ServiceStandardError(ServiceStandardError.REQ_ERROR,wrapped_exception=e) return count if __name__ == "__main__": args = { 'keyword': '纽约', 'source': 'booking', 'map_info': '0.0', 'country_id':'501', 'city_id': '10002', 'database_name': 'Cityupline' } task = Task(_worker='', _task_id='demo', _source='hotels', _type='supplement_field', _task_name='all_hotels_city_suggest', _used_times=0, max_retry_times=6, kwargs=args, _queue='supplement_field', _routine_key='supplement_field', list_task_token='test', task_type=0, collection='') normal = AllHotelSourceSDK(task) normal.execute()
# -*- coding: utf-8 -*- # @Time : 2017/12/12 下午8:02 # @Author : Hou Rong # @Site : # @File : test_ks_move_img.py # @Software: PyCharm from SDK import KsMoveSDK from proj.my_lib.Common.Task import Task if __name__ == '__main__': args = { 'from_bucket': 'mioji-attr', 'to_bucket': 'mioji-shop', 'file_name': '00001b7e38457f1b826311b1ff92043c.jpg' } task = Task(_worker='proj.total_tasks.qyer_city_task', _queue='supplement_field', _routine_key='supplement_field', _task_name='demo', _source='Qyer', _type='CityInfo', task_type=0, _used_times=0, max_retry_times=6, collection='Unknown', _task_id='demo', list_task_token='null', kwargs=args) _sdk = KsMoveSDK(task=task) _sdk.execute()
task = Task( _worker='', _task_id='demo', _source='bestwest', _type='hotel_list', _task_name='list_hotel_ihg_20171218a', _used_times=2, max_retry_times=6, # kwargs={ # 'source': 'hotels', # 'city_id': 'NULL', # 'country_id': '205', # 'check_in': '20171128', # 'part': '20170929a', # 'is_new_type': 1, # 'suggest_type': 1, # 'suggest': '''https://www.hotels.cn/search.do?resolved-location=CITY%3A1638661%3AUNKNOWN%3AUNKNOWN&destination-id=1638661&q-destination=%E9%A9%AC%E5%B8%8C%E5%B2%9B,%20%E5%A1%9E%E8%88%8C%E5%B0%94&q-check-in=2018-01-08&q-check-out=2018-01-11&q-rooms=1&q-room-0-adults=2&q-room-0-children=0''' # }, kwargs={ "suggest_type": "2", "check_in": "20180525", "city_id": "null", # "suggest": "{u'name': u'\\u5bbf\\u52a1', u'redirectPage': u'DEFAULT_PAGE', u'longitude': 123.89309, u'caption': u\"\\u5bbf\\u52a1, \\u83f2\\u5f8b\\u5bbe (Fi<span class='highlighted'>lip\\xedny</span>)\", u'destinationId': u'987200', u'latitude': 10.309726, u'landmarkCityDestinationId': None, u'type': u'CITY', u'geoId': u'1000000000000000800'}", # 'suggest': 'https://www.expedia.com.hk/Hotel-Search?destination=%E7%BA%AA%E5%BF%B5%E7%A2%91%E8%B0%B7%EF%BC%88%E5%8F%8A%E9%82%BB%E8%BF%91%E5%9C%B0%E5%8C%BA%EF%BC%89,+%E7%8A%B9%E4%BB%96%E5%B7%9E,+%E7%BE%8E%E5%9B%BD&startDate=2018/02/01&endDate=2018/02/02&adults=2&searchPriorityOverride=213', # 'suggest': '''{"hits": 2, "countryCode": "0925", "longitude": -0.12714, "label": "London, United Kingdom", "rank": 10.0, "suggestion": "London, United Kingdom", "destinationType": "CITY", "latitude": 51.506321, "type": "B"}''', 'suggest': '''印度喀拉拉邦恰拉库德伊&76.3336779,10.3080272''', "country_id": "null", "source": "bestwest", "part": "20180428a", "is_new_type": 0, "date_index": 0 }, _routine_key='hotel_list', list_task_token='', _queue='hotel_list', task_type=0)
# @Author : Hou Rong # @Site : # @File : test_hotel_merge_img.py # @Software: PyCharm import sys sys.path.append('/data/lib') from proj.my_lib.Common.Task import Task, TaskType from proj.total_tasks import hotel_img_merge_task if __name__ == '__main__': task = Task( _worker='', _task_id='demo', _source='qyer', _type='poi_list', _task_name='test_task', _used_times=0, max_retry_times=6, kwargs={ 'uid': 'ht10015059', 'min_pixels': '200000' }, list_task_token='', task_type=TaskType.NORMAL, _queue='', _routine_key='', ) hotel_img_merge_task(task=task)
# @Software: PyCharm import sys sys.path.append('/data/lib') from proj.my_lib.Common.Task import Task from proj.total_tasks import poi_list_task if __name__ == '__main__': # task = Task(_worker='', _task_id='demo', _source='daodao', _type='rest', _task_name='list_rest_daodao_20170925d', # _used_times=0, max_retry_times=6, # kwargs={'source': 'daodao', # 'url': '/Tourism-g488103-Grand_Baie-Vacations.html', # 'city_id': '51513', # 'country_id': '409', # 'poi_type': 'rest'}) # poi_list_task(task=task) task = Task(_worker='', _task_id='demo', _source='daodao', _type='rest', _task_name='list_rest_daodao_20170925d', _used_times=0, max_retry_times=6, kwargs={ 'source': 'daodao', # 'url': '/Tourism-g294452-Sofia_Sofia_Region-Vacations.html', 'url': '/Tourism-g503715-Longyearbyen_Spitsbergen_Svalbard-Vacations.html', 'city_id': '20371', 'country_id': '107', 'poi_type': 'attr', 'check_in': '20171203', 'date_index': 0 }, _routine_key='list_task', _queue='list_task', list_task_token='demo', task_type=0) poi_list_task(task=task)
import sys sys.path.append('/data/lib') from proj.my_lib.Common.Task import Task from proj.total_tasks import poi_detail_task if __name__ == '__main__': task = Task( _worker='', _task_id='demo', _source='Daodao', _type='attr', _task_name='detail_attr_daodao_20171222a', _used_times=0, max_retry_times=6, kwargs={ # 'target_url': 'https://www.tripadvisor.cn//Attraction_Review-g187492-d13168754-Reviews-Centro_de_Interpretacion_de_los_Castros-Leon_Province_of_Leon_Castile_and_Leon.html', # 'target_url': 'https://www.tripadvisor.cn//Attraction_Review-g297697-d10126675-Reviews-JBS_Photo_Canvas-Kuta_Kuta_District_Bali.html', 'target_url': 'https://www.tripadvisor.cn//Attraction_Review-g297697-d10126675-Reviews-JBS_Photo_Canvas-Kuta_Kuta_District_Bali.html', 'city_id': 'NULL', 'poi_type': 'attr', 'country_id': 'NULL', 'part': "detail_attr_daodao_20171222a" }, _routine_key='list_task', _queue='poi_detail', list_task_token='demo', task_type=0) poi_detail_task(task=task)
def get_task_total_simple(queue, used_times=6, limit=30000, debug=False): """ 简单从 mongodb 中获取任务的方法 :type queue: str :type used_times: int :type limit: int :type debug: bool :return: Task """ # type: (str, int, int, bool) -> Task collection_name_list = [ i3 for i1, i2, i3, i4 in get_serviceplatform_monitor_info() ] collection_prefix = 'Task_Queue_{}_TaskName_'.format(queue) c_list = list( filter(lambda x: str(x).startswith(collection_prefix), collection_name_list)) if queue == 'poi_list': c_list = list( filter(lambda x: str(x).split('_')[-1] >= "20171214a", c_list)) if queue == 'hotel_list': c_list = list( filter( lambda x: str(x).split('_')[-2] != "ihg" and str(x).split('_')[ -1] != "20171212a", c_list)) # 防止过度均分,选取 2000 为最小分配值 per_limit = max(limit // len(c_list), 2000) c_list = list(map(lambda x: (x, per_limit), c_list)) # if queue == 'file_downloader': # c_list.append(('Task_Queue_file_downloader_TaskName_images_total_qyer_20171201a', 2000)) # c_list.append(('Task_Queue_file_downloader_TaskName_images_total_qyer_20171120a', 2000)) # todo 先均分任务,之后考虑不同的阀值分配不同的任务 for each_collection_name, each_limit in c_list: if each_collection_name in ( 'Task_Queue_hotel_list_TaskName_google_hotel_url_20180409a', 'Task_Queue_hotel_list_TaskName_list_result_daodao_20180401af' ): continue if each_collection_name.endswith('20180412g'): continue if each_collection_name.endswith('20180416a'): continue # if each_collection_name in ('Task_Queue_hotel_detail_TaskName_detail_result_daodao_20180412af', 'Task_Queue_hotel_list_TaskName_list_result_daodao_20180412a'):continue # if each_collection_name!='Task_Queue_hotel_list_TaskName_list_result_daodao_20180412a':continue # if each_collection_name=='Task_Queue_grouptravel_TaskName_detail_total_GT_20180314a': continue # or each_collection_name=='Task_Queue_grouptravel_TaskName_detail_total_GT_20180312a' :continue try: for d in _get_task_total_simple( collection_name=each_collection_name, queue=queue, used_times=used_times, limit=each_limit, debug=debug): # _queue, _worker, _task_id, _source, _type, _task_name, _used_times, max_retry_times, t_list = d['task_name'].split('_') if not t_list: continue if t_list[0] == 'list': if t_list[1] == 'result': task_type = TaskType.NORMAL else: task_type = TaskType.LIST_TASK else: task_type = TaskType.NORMAL _task = Task(_queue=d['queue'], _routine_key=d['routing_key'], _worker=d['worker'], _task_id=d['task_token'], _source=d['source'], _type=d['type'], _task_name=d['task_name'], _used_times=d['used_times'], max_retry_times=used_times, task_type=task_type, list_task_token=d.get('list_task_token', None), kwargs=d['args'], collection=each_collection_name) if task_type == TaskType.LIST_TASK: _task.list_task_token = d['list_task_token'] yield _task except Exception as exc: logger.exception(msg="[collection exc]", exc_info=exc)