Beispiel #1
0
# @Site    :
# @File    : qyer_list_test.py
# @Software: PyCharm
from proj.my_lib.Common.Task import Task
from proj.total_tasks import hilton_tax_task

if __name__ == '__main__':

    args = {
        "check_in": "20180128",
        "city_id": "50012",
        "source_id": "NYCDTDT",
        "source": "hilton",
        "date_index": 0
    }

    task = Task(_worker='',
                _task_id='demo',
                _source='hilton',
                _type='hotel_list',
                _task_name='hilton_tax_test',
                _used_times=0,
                max_retry_times=6,
                kwargs=args,
                _queue='hotel_list',
                _routine_key='hotel_list',
                list_task_token='test',
                task_type=0,
                collection='10.19.2.103')
    hilton_tax_task(task=task)
Beispiel #2
0
            if source in key:
                class_name = all_sdk.get(key)
                break
        return class_name

    def _execute(self, **kwargs):

        source = self.task.source
        class_name = self.get_task_sdk(source)
        sdk_object = class_name(self.task)
        sdk_object.execute()


if __name__ == "__main__":
    args = {'keyword': '纽约'}
    task = Task(_worker='',
                _task_id='demo',
                _source='daodaocity',
                _type='poi_list',
                _task_name='daodao_city_suggest',
                _used_times=0,
                max_retry_times=6,
                kwargs=args,
                _queue='poi_list',
                _routine_key='poi_list',
                list_task_token='test',
                task_type=0,
                collection='')
    normal = SlowTaskSDK(task)

    normal.execute()
task = Task(
    _worker='proj.total_tasks.images_task',
    _task_id='176ddbc7960c2a6f6d8d7c9baea65617',
    _source='tuniuGT',
    _type='DownloadImages',
    _task_name='image_GT_tuniu_20180414',
    _used_times=0,
    max_retry_times=6,
    # kwargs={"source": "daodao", "new_part": "detail_attr_daodao_20171122a",
    #         "target_url": "https://ccm.ddcdn.com/ext/photo-s/0f/dd/44/61/peaceful-time.jpg",
    #         "source_id": "test", "bucket_name": "mioji-attr", "is_poi_task": True, "part": "20171122a",
    #         "file_prefix": ""},
    # kwargs={
    #     "source": "ihg",
    #     "new_part": "detail_hotel_ihg_20171220a",
    #     "target_url": "https://ihg.scene7.com/is/image/ihg/candlewood-suites-idaho-falls-3053752126-4x3?fmt=png-alpha",
    #     "source_id": "idapd",
    #     "bucket_name": "mioji-hotel",
    #     "is_poi_task": False,
    #     "part": "20171220a",
    #     "file_prefix": ""
    # },
    kwargs={
        'file_prefix': 'tuniuGT',
        'target_url':
        "https://m.tuniucdn.com//fb2/t1/G3/M00/3D/0B/Cii_LlloJcyIGfs2AFyrXaRNclAAADnOgNM364AXKt1824_w640_h480_c1_t0.jpg",
        'source_id': "210138695",
        'need_insert_db': True,
        'source': "tuniuGT",
        'bucket_name': 'mioji-grouptravel',
        'is_poi_task': True,
    },
    _queue='file_downloader',
    _routine_key='file_downloader',
    list_task_token='',
    task_type=0)
Beispiel #4
0
    #                 "country_id": "NULL",
    #                 "source": "holiday",
    #                 "part": "detail_hotel_holiday_20171226a",
    #                 "city_id": "NULL",
    #                 "source_id": "ABYSY"
    #             },
    #             task_type=TaskType.NORMAL, list_task_token=None)
    #
    # print(hotel_detail_task(task=task))

    task = Task(_worker='', _queue='hotel_detail', _routine_key='hotel_detail', _task_id='demo', _source='fourseasons',
                _type='hotel',
                _task_name='detail_hotel_fourseasons_20180325a',
                _used_times=0, max_retry_times=10,
                kwargs={
                    # "url": "http://www.booking.com\n/hotel/ph/tg-hometel.zh-cn.html?label=gen173nr-1DCAEoggJCAlhYSDNiBW5vcmVmcgV1c19jYYgBAZgBMsIBA2FibsgBDNgBA-gBAZICAXmoAgQ;sid=3b827f3aa2e3fca0a95ec0d56605f64a;checkin=2018-01-08;checkout=2018-01-11;ucfs=1;soh=1;srpvid=511e686ec99000f9;srepoch=1511448670;highlighted_blocks=;all_sr_blocks=;room1=A%2CA;soldout=0%2C0;hpos=10;hapos=520;dest_type=region;dest_id=5374;srfid=0a39626563bec2b30fbbedccb1438d4e5f55493fX520;from=searchresults;soldout_clicked=1\n;highlight_room=#no_availability_msg",
                    # "url": "https://www.ihg.com/holidayinnexpress/hotels/cn/zh/teluk/hoteldetail#####https://apis.ihg.com/hotels/v1/profiles/TELUK/details",
                    # "url": "https://www.expedia.com.hk/Bhimtal-Hotels-Emerald-Trail.h4474316.Hotel-Information?chkin=2017%2F12%2F6&chkout=2017%2F12%2F7&rm1=a2&regionId=6139790&sort=recommended&hwrqCacheKey=b07edfbf-68f1-472b-b58d-d153dc82d7feHWRQ1511794413272&vip=false&c=c8d5ec02-71e2-496b-aa9f-5988e64b7931&",
                    # "url": "https://www.booking.com/hotel/us/new-lakefront-home-4br-47-2b-in-katy-west-houston.zh-cn.html?aid=376390;label=misc-aHhSC9cmXHUO1ZtqOcw05wS94870954985%3Apl%3Ata%3Ap1%3Ap2%3Aac%3Aap1t1%3Aneg%3Afi%3Atikwd-11455299683%3Alp9061505%3Ali%3Adec%3Adm;sid=760b4b8ac503b49f5d89e67ec36a2fa9;aer=1;dest_id=20126498;dest_type=city;dist=0;hapos=90;hpos=15;room1=A%2CA;sb_price_type=total;spdest=ci%2F20126498;spdist=41.0;srepoch=1511794977;srfid=75643f0d9b7ac3fe31b60ecc58ba9f10b377fd16X90;srpvid=fdcc69d0f9a606d5;type=total;ucfs=1&#hotelTmpl",
                    # "url": "https://www.expedia.com.hk/Hotels-Beautiful.h19200665.Hotel-Information",
                    "url": "https://www.fourseasons.com/en/maldivesfse/",
                    "country_id": "NULL",
                    "source": "fourseasons",
                    "part": "detail_hotel_fourseasons_20180325a",
                    "city_id": "NULL",
                    "source_id": "baku"
                },
                task_type=TaskType.NORMAL, list_task_token=None)

    print(hotel_detail_task(task=task))
# @Author  : Hou Rong
# @Site    : 
# @File    : qyer_list_test.py
# @Software: PyCharm
from proj.my_lib.Common.Task import Task
from proj.total_tasks import qyer_list_task

if __name__ == '__main__':
    # args = {
    #     'source': 'qyer',
    #     'country_id': '412',
    #     'city_id': '40051',
    #     'check_in': '20170925',
    #     'city_url': 'http://place.qyer.com/praslin-island/'
    # }

    args = {
        "check_in": "20180128",
        "city_id": "20645",
        "country_id": "133",
        "source": "qyer",
        "city_url": "http://place.qyer.com/albania/",
        "date_index": 0
    }

    task = Task(_worker='', _task_id='demo', _source='qyer', _type='poi_list', _task_name='list_qyer_total_test',
                _used_times=0, max_retry_times=6,
                kwargs=args, _queue='poi_list',
                _routine_key='poi_list', list_task_token='test', task_type=0, collection='')
    qyer_list_task(task=task)
Beispiel #6
0
                else:
                    url = source_interface[source].format(keyword)
                    response = session.get(url=url,)
                    get_suggest = getattr(sys.modules[__name__],'get_{0}_suggest'.format(source))

                count = get_suggest(response.content,map_info,country_id,city_id,database_name,keyword)
                if count >= 0:
                    self.task.error_code = 0
            except Exception as e:
                print(e)
                raise ServiceStandardError(ServiceStandardError.REQ_ERROR,wrapped_exception=e)

        return count

if __name__ == "__main__":
    args = {
        'keyword': '纽约',
        'source': 'booking',
        'map_info': '0.0',
        'country_id':'501',
        'city_id': '10002',
        'database_name': 'Cityupline'
    }
    task = Task(_worker='', _task_id='demo', _source='hotels', _type='supplement_field',
                _task_name='all_hotels_city_suggest',
                _used_times=0, max_retry_times=6,
                kwargs=args, _queue='supplement_field',
                _routine_key='supplement_field', list_task_token='test', task_type=0, collection='')
    normal = AllHotelSourceSDK(task)
    normal.execute()
# -*- coding: utf-8 -*-
# @Time    : 2017/12/12 下午8:02
# @Author  : Hou Rong
# @Site    :
# @File    : test_ks_move_img.py
# @Software: PyCharm
from SDK import KsMoveSDK
from proj.my_lib.Common.Task import Task

if __name__ == '__main__':
    args = {
        'from_bucket': 'mioji-attr',
        'to_bucket': 'mioji-shop',
        'file_name': '00001b7e38457f1b826311b1ff92043c.jpg'
    }
    task = Task(_worker='proj.total_tasks.qyer_city_task',
                _queue='supplement_field',
                _routine_key='supplement_field',
                _task_name='demo',
                _source='Qyer',
                _type='CityInfo',
                task_type=0,
                _used_times=0,
                max_retry_times=6,
                collection='Unknown',
                _task_id='demo',
                list_task_token='null',
                kwargs=args)
    _sdk = KsMoveSDK(task=task)
    _sdk.execute()
Beispiel #8
0
task = Task(
    _worker='',
    _task_id='demo',
    _source='bestwest',
    _type='hotel_list',
    _task_name='list_hotel_ihg_20171218a',
    _used_times=2,
    max_retry_times=6,
    # kwargs={
    #     'source': 'hotels',
    #     'city_id': 'NULL',
    #     'country_id': '205',
    #     'check_in': '20171128',
    #     'part': '20170929a',
    #     'is_new_type': 1,
    #     'suggest_type': 1,
    #     'suggest': '''https://www.hotels.cn/search.do?resolved-location=CITY%3A1638661%3AUNKNOWN%3AUNKNOWN&destination-id=1638661&q-destination=%E9%A9%AC%E5%B8%8C%E5%B2%9B,%20%E5%A1%9E%E8%88%8C%E5%B0%94&q-check-in=2018-01-08&q-check-out=2018-01-11&q-rooms=1&q-room-0-adults=2&q-room-0-children=0'''
    # },
    kwargs={
        "suggest_type": "2",
        "check_in": "20180525",
        "city_id": "null",
        # "suggest": "{u'name': u'\\u5bbf\\u52a1', u'redirectPage': u'DEFAULT_PAGE', u'longitude': 123.89309, u'caption': u\"\\u5bbf\\u52a1, \\u83f2\\u5f8b\\u5bbe (Fi<span class='highlighted'>lip\\xedny</span>)\", u'destinationId': u'987200', u'latitude': 10.309726, u'landmarkCityDestinationId': None, u'type': u'CITY', u'geoId': u'1000000000000000800'}",
        # 'suggest': 'https://www.expedia.com.hk/Hotel-Search?destination=%E7%BA%AA%E5%BF%B5%E7%A2%91%E8%B0%B7%EF%BC%88%E5%8F%8A%E9%82%BB%E8%BF%91%E5%9C%B0%E5%8C%BA%EF%BC%89,+%E7%8A%B9%E4%BB%96%E5%B7%9E,+%E7%BE%8E%E5%9B%BD&startDate=2018/02/01&endDate=2018/02/02&adults=2&searchPriorityOverride=213',
        # 'suggest': '''{"hits": 2, "countryCode": "0925", "longitude": -0.12714, "label": "London, United Kingdom", "rank": 10.0, "suggestion": "London, United Kingdom", "destinationType": "CITY", "latitude": 51.506321, "type": "B"}''',
        'suggest': '''印度喀拉拉邦恰拉库德伊&76.3336779,10.3080272''',
        "country_id": "null",
        "source": "bestwest",
        "part": "20180428a",
        "is_new_type": 0,
        "date_index": 0
    },
    _routine_key='hotel_list',
    list_task_token='',
    _queue='hotel_list',
    task_type=0)
# @Author  : Hou Rong
# @Site    :
# @File    : test_hotel_merge_img.py
# @Software: PyCharm
import sys

sys.path.append('/data/lib')
from proj.my_lib.Common.Task import Task, TaskType
from proj.total_tasks import hotel_img_merge_task

if __name__ == '__main__':
    task = Task(
        _worker='',
        _task_id='demo',
        _source='qyer',
        _type='poi_list',
        _task_name='test_task',
        _used_times=0,
        max_retry_times=6,
        kwargs={
            'uid': 'ht10015059',
            'min_pixels': '200000'
        },
        list_task_token='',
        task_type=TaskType.NORMAL,
        _queue='',
        _routine_key='',
    )

    hotel_img_merge_task(task=task)
Beispiel #10
0
# @Software: PyCharm
import sys

sys.path.append('/data/lib')
from proj.my_lib.Common.Task import Task
from proj.total_tasks import poi_list_task

if __name__ == '__main__':
    # task = Task(_worker='', _task_id='demo', _source='daodao', _type='rest', _task_name='list_rest_daodao_20170925d',
    #             _used_times=0, max_retry_times=6,
    #             kwargs={'source': 'daodao',
    #                     'url': '/Tourism-g488103-Grand_Baie-Vacations.html',
    #                     'city_id': '51513',
    #                     'country_id': '409',
    #                     'poi_type': 'rest'})
    # poi_list_task(task=task)

    task = Task(_worker='', _task_id='demo', _source='daodao', _type='rest', _task_name='list_rest_daodao_20170925d',
                _used_times=0, max_retry_times=6,
                kwargs={
                    'source': 'daodao',
                    # 'url': '/Tourism-g294452-Sofia_Sofia_Region-Vacations.html',
                    'url': '/Tourism-g503715-Longyearbyen_Spitsbergen_Svalbard-Vacations.html',
                    'city_id': '20371',
                    'country_id': '107',
                    'poi_type': 'attr',
                    'check_in': '20171203',
                    'date_index': 0
                }, _routine_key='list_task', _queue='list_task', list_task_token='demo', task_type=0)
    poi_list_task(task=task)
Beispiel #11
0
import sys

sys.path.append('/data/lib')
from proj.my_lib.Common.Task import Task
from proj.total_tasks import poi_detail_task

if __name__ == '__main__':
    task = Task(
        _worker='',
        _task_id='demo',
        _source='Daodao',
        _type='attr',
        _task_name='detail_attr_daodao_20171222a',
        _used_times=0,
        max_retry_times=6,
        kwargs={
            # 'target_url': 'https://www.tripadvisor.cn//Attraction_Review-g187492-d13168754-Reviews-Centro_de_Interpretacion_de_los_Castros-Leon_Province_of_Leon_Castile_and_Leon.html',
            # 'target_url': 'https://www.tripadvisor.cn//Attraction_Review-g297697-d10126675-Reviews-JBS_Photo_Canvas-Kuta_Kuta_District_Bali.html',
            'target_url':
            'https://www.tripadvisor.cn//Attraction_Review-g297697-d10126675-Reviews-JBS_Photo_Canvas-Kuta_Kuta_District_Bali.html',
            'city_id': 'NULL',
            'poi_type': 'attr',
            'country_id': 'NULL',
            'part': "detail_attr_daodao_20171222a"
        },
        _routine_key='list_task',
        _queue='poi_detail',
        list_task_token='demo',
        task_type=0)
    poi_detail_task(task=task)
Beispiel #12
0
def get_task_total_simple(queue, used_times=6, limit=30000, debug=False):
    """
    简单从 mongodb 中获取任务的方法
    :type queue: str
    :type used_times: int
    :type limit: int
    :type debug: bool
    :return: Task
    """
    # type:  (str, int, int, bool) -> Task
    collection_name_list = [
        i3 for i1, i2, i3, i4 in get_serviceplatform_monitor_info()
    ]
    collection_prefix = 'Task_Queue_{}_TaskName_'.format(queue)
    c_list = list(
        filter(lambda x: str(x).startswith(collection_prefix),
               collection_name_list))

    if queue == 'poi_list':
        c_list = list(
            filter(lambda x: str(x).split('_')[-1] >= "20171214a", c_list))

    if queue == 'hotel_list':
        c_list = list(
            filter(
                lambda x: str(x).split('_')[-2] != "ihg" and str(x).split('_')[
                    -1] != "20171212a", c_list))

    # 防止过度均分,选取 2000 为最小分配值
    per_limit = max(limit // len(c_list), 2000)
    c_list = list(map(lambda x: (x, per_limit), c_list))

    # if queue == 'file_downloader':
    #     c_list.append(('Task_Queue_file_downloader_TaskName_images_total_qyer_20171201a', 2000))
    #     c_list.append(('Task_Queue_file_downloader_TaskName_images_total_qyer_20171120a', 2000))
    # todo 先均分任务,之后考虑不同的阀值分配不同的任务
    for each_collection_name, each_limit in c_list:

        if each_collection_name in (
                'Task_Queue_hotel_list_TaskName_google_hotel_url_20180409a',
                'Task_Queue_hotel_list_TaskName_list_result_daodao_20180401af'
        ):
            continue
        if each_collection_name.endswith('20180412g'): continue
        if each_collection_name.endswith('20180416a'): continue
        # if each_collection_name in ('Task_Queue_hotel_detail_TaskName_detail_result_daodao_20180412af', 'Task_Queue_hotel_list_TaskName_list_result_daodao_20180412a'):continue
        # if each_collection_name!='Task_Queue_hotel_list_TaskName_list_result_daodao_20180412a':continue
        # if each_collection_name=='Task_Queue_grouptravel_TaskName_detail_total_GT_20180314a': continue
        #         or each_collection_name=='Task_Queue_grouptravel_TaskName_detail_total_GT_20180312a' :continue

        try:
            for d in _get_task_total_simple(
                    collection_name=each_collection_name,
                    queue=queue,
                    used_times=used_times,
                    limit=each_limit,
                    debug=debug):
                # _queue, _worker, _task_id, _source, _type, _task_name, _used_times, max_retry_times,
                t_list = d['task_name'].split('_')
                if not t_list:
                    continue
                if t_list[0] == 'list':
                    if t_list[1] == 'result':
                        task_type = TaskType.NORMAL
                    else:
                        task_type = TaskType.LIST_TASK
                else:
                    task_type = TaskType.NORMAL

                _task = Task(_queue=d['queue'],
                             _routine_key=d['routing_key'],
                             _worker=d['worker'],
                             _task_id=d['task_token'],
                             _source=d['source'],
                             _type=d['type'],
                             _task_name=d['task_name'],
                             _used_times=d['used_times'],
                             max_retry_times=used_times,
                             task_type=task_type,
                             list_task_token=d.get('list_task_token', None),
                             kwargs=d['args'],
                             collection=each_collection_name)
                if task_type == TaskType.LIST_TASK:
                    _task.list_task_token = d['list_task_token']

                yield _task
        except Exception as exc:
            logger.exception(msg="[collection exc]", exc_info=exc)