コード例 #1
0
def hotel_list_database(source,
                        url,
                        required,
                        old_spider_name,
                        need_cache=True):
    try:
        task = Task()
        task.content = urljoin(URL, url)
        logger.info('%s  %s' % (task.content, required))
        task.source = source.lower().capitalize() + 'ListInfo'
        # spider = factory.get_spider('daodao', task.source)
        spider = factory.get_spider_by_old_source('daodao' + old_spider_name)
        spider.task = task
        if need_cache:
            code = spider.crawl(required=[required], cache_config=cache_config)
        else:
            code = spider.crawl(required=[required],
                                cache_config=none_cache_config)

        others_info = {
            'result': spider.result,
            'save_page': json.dumps(spider.save_page),
            'view_page_info': spider.view_page_info,
            'restaurant_page_info': spider.restaurant_page_info
        }

        return code, spider.result.get(
            required, {}), others_info, spider.page_store_key_list
    except Exception as e:
        logger.error(traceback.format_exc(e))
        raise e
コード例 #2
0
def hotel_list_database(source, city_id):
    task = Task()
    task.content = str(city_id) + '&' + '2&{nights}&{check_in}'.format(
        **hotel_rooms)
    spider = factory.get_spider_by_old_source(source + 'ListHotel')
    spider.task = task
    print spider.crawl(required=['hotel'])
    return spider.result
コード例 #3
0
def entry_test(task):
    spider = factory.get_spider_by_old_task(task)
    if spider is None:
        spider = factory.get_spider_by_old_source(task.source)
        if spider is None:
            return None
        spider.task = task
    return spider
コード例 #4
0
def hotel_list_database(source, city_id, check_in):
    task = Task()
    task.content = str(city_id) + '&' + '2&1&{0}'.format(check_in)
    task.source = source + 'ListHotel'
    spider = factory.get_spider_by_old_source(task.source)
    spider.task = task
    code = spider.crawl(required=['hotel'])
    return code, spider.result
コード例 #5
0
def hilton_to_database(tid, used_times, source, keyword, extra, spider_tag, need_cache=True):
    task = Task()
    task.content = keyword
    task.extra = extra
    spider = factory.get_spider_by_old_source(spider_tag)
    spider.task = task
    if need_cache:
        error_code = spider.crawl(required=['suggest'], cache_config=cache_config)
    else:
        error_code = spider.crawl(required=['suggest'], cache_config=none_cache_config)
    logger.info(
        str(len(spider.result['suggest'])) + '  --  ' + keyword)
    return error_code, spider.result['suggest']
コード例 #6
0
def hotel_url_to_database(tid,
                          used_times,
                          source,
                          keyword,
                          spider_tag,
                          need_cache=False):
    task = Task()
    task.ticket_info['hotel_name'] = keyword
    spider = factory.get_spider_by_old_source(spider_tag)
    spider.task = task
    error_code = spider.crawl(required=['hotel'],
                              cache_config=none_cache_config)
    tem_dic = spider.result
    if len(spider.result['hotel']) <= 2:
        task2 = Task()
        task2.ticket_info['hotel_name'] = keyword
        spider2 = factory.get_spider_by_old_source(spider_tag)
        spider2.task = task2
        error_code2 = spider2.crawl(required=['hotel'],
                                    cache_config=none_cache_config)
        for j in spider2.result['hotel']:
            tem_dic['hotel'].append(j)
    return error_code, tem_dic, spider.user_datas['search_result']
コード例 #7
0
def hotel_detail_database(url, source, need_cache=True):
    task = Task()
    task.content = url
    spider = factory.get_spider_by_old_source(source + 'DetailHotel')
    spider.task = task
    spider.task.source = source
    if need_cache:
        error_code = spider.crawl(required=['hotel'],
                                  cache_config=cache_config)
    else:
        error_code = spider.crawl(required=['hotel'],
                                  cache_config=none_cache_config)
    logger.info(
        str(task.ticket_info) + '  --  ' + task.content + '--' +
        str(error_code))
    return error_code, spider.result, spider.page_store_key_list
コード例 #8
0
def hotel_url_to_database(source, keyword, need_cache=False):
    task = Task()
    task.ticket_info['url'] = keyword
    task.ticket_info['hotel_name'] = keyword
    old_target = source + 'ListHotel'
    spider = factory.get_spider_by_old_source(old_target)
    spider.task = task
    if need_cache:
        error_code = spider.crawl(required=['hotel'], cache_config=cache_config)
    else:
        error_code = spider.crawl(required=['hotel'], cache_config=none_cache_config)
    print(error_code)
    # if data_from == 'google':
    #     return error_code,spider.result,spider.user_datas['search_result']
    # print spider.result['hotel']
    return error_code, spider.result['hotel']
コード例 #9
0
def poidetail_to_database(tid, used_times, source, url, need_cache=True):
    task = Task()
    task.content = url
    task.ticket_info = {
        'tid': tid,
        'used_times': used_times
    }
    print (source + '_detail')
    spider = factory.get_spider_by_old_source(source+'_detail')
    spider.task = task
    if need_cache:
        error_code = spider.crawl(required=['POIdetail'], cache_config=cache_config)
    else:
        error_code = spider.crawl(required=['POIdetail'], cache_config=none_cache_config)
    print(error_code)
    logger.info(str(spider.result['POIdetail']) + '  --  ' + task.content)
    return error_code, spider.result['POIdetail'], spider.page_store_key_list
コード例 #10
0
def hilton_to_database(tid, used_times, source, source_id, city_id, check_in, need_cache=True):
    task = Task()
    task.content = 'NULL&' + str(city_id) + '&' + str(source_id) + '&' + '2&{0}'.format(check_in)
    task.ticket_info = {
        'tid': tid,
        'used_times': used_times,
        'room_info': [{"occ": 2, "num": 1}]
    }
    spider = factory.get_spider_by_old_source('hiltonHotel2')
    spider.task = task
    if need_cache:
        error_code = spider.crawl(required=['list', 'room'], cache_config=cache_config)
    else:
        error_code = spider.crawl(required=['list', 'room'], cache_config=none_cache_config)
    print(error_code)
    logger.info(str(spider.result['room']) + '  --  ' + task.content)
    return error_code, spider.result['room'], spider.page_store_key_list
コード例 #11
0
def GTdetail_to_database(tid, used_times, source, ticket, need_cache=True):
    task = Task()
    task.ticket_info = {
        'tid': tid,
        'vacation_info': ticket,
        'source': source,
        'used_times': used_times
    }
    spider = factory.get_spider_by_old_source(
        '{}|vacation_detail'.format(source))
    spider.task = task
    if need_cache:
        error_code = spider.crawl(required=['vacation'],
                                  cache_config=cache_config)
    else:
        error_code = spider.crawl(required=['vacation'],
                                  cache_config=none_cache_config)
    print(error_code)
    # logger.info(str(spider.result['vacation']) + '  --  ' + task.ticket_info['vacation_info']['url'])
    return error_code, spider.result['vacation'], spider.page_store_key_list
コード例 #12
0
def qyer_list_to_database(tid,
                          used_times,
                          source,
                          city_id,
                          check_in,
                          city_url,
                          need_cache=True):
    task = Task()
    task.content = city_url
    task.ticket_info = {'tid': tid, 'used_times': used_times}
    spider = factory.get_spider_by_old_source('qyerList')
    spider.task = task
    if need_cache:
        error_code = spider.crawl(required=['list'], cache_config=cache_config)
    else:
        error_code = spider.crawl(required=['list'],
                                  cache_config=none_cache_config)
    print(error_code)
    logger.info(str(spider.result['list']) + '  --  ' + task.content)
    return error_code, spider.result[
        'list'], spider.page_store_key_list, spider.types_result_num
コード例 #13
0
def hotel_list_database(source, check_in, suggest_type='1', suggest=''):
    # 初始化任务
    task = Task()
    task.ticket_info = {
        "is_new_type": True,
        "suggest_type": int(suggest_type),
        "suggest": suggest,
        "check_in": str(check_in),
        "stay_nights": '1',
        "occ": '2',
        'is_service_platform': True,
        'tid': uuid.uuid4(),
        'used_times': random.randint(1, 6),
    }
    task.content = ''

    # 初始化 spider
    spider = factory.get_spider_by_old_source(OLD_SOURCE)
    spider.task = task

    # 请求
    error_code = spider.crawl(required=REQUIRED, cache_config=False)

    return error_code, spider.result, spider.page_store_key_list
コード例 #14
0
def hotel_list_database(tid,
                        used_times,
                        source,
                        city_id,
                        check_in,
                        is_new_type=False,
                        suggest_type='1',
                        suggest='',
                        need_cache=True,
                        flag=False):
    task = Task()
    task.source = source
    if not is_new_type:
        if source == 'hilton':
            task.content = check_in
        elif source == 'starwood':
            task.content = suggest + '&'
        elif source in ['hyatt']:
            task.content = ''
        elif source == 'gha':
            task.content = suggest
        else:
            task.content = str(city_id) + '&' + '2&1&{0}'.format(check_in)

        task.ticket_info = {
            "is_new_type": False,
            'is_service_platform': True,
            'tid': tid,
            'used_times': used_times
        }
        if source == 'bestwest':
            description = suggest.split('&')[0]
            map_info = suggest.split('&')[1]
            map_info = map_info.split(',')
            task.content = '&{}&{}&2'.format(description, check_in)
            task.ticket_info = {
                'locationLng': float(map_info[0]),
                'locationLat': float(map_info[1])
            }
            # task.content = '&印度喀拉拉邦恰拉库德伊&20180525&2'
            # task.ticket_info = {'locationLng': '13.404954', 'locationLat': '52.5200066'}
    else:
        task.ticket_info = {
            "is_new_type": True,
            "suggest_type": int(suggest_type),
            "suggest": suggest,
            "check_in": str(check_in),
            "stay_nights": '1',
            "city_id": city_id,
            "occ": '2',
            'is_service_platform': True,
            'tid': tid,
            'used_times': used_times,
        }
        task.content = ''
    print(task.ticket_info)
    if flag:
        old_spider_tag = source + 'FilterHotel'
        required = ['filter']
    else:
        old_spider_tag = source + 'ListHotel'
        required = ['hotel']
    spider = factory.get_spider_by_old_source(old_spider_tag)
    spider.task = task
    if need_cache:
        error_code = spider.crawl(required=required, cache_config=cache_config)
    else:
        error_code = spider.crawl(required=required,
                                  cache_config=none_cache_config)
    # logger.info(str(task.ticket_info) + '  --  ' + '-'+str(error_code)+'-' +task.content)
    # logger.info(str(spider.result['hotel'][:100]))
    return error_code, spider.result, spider.page_store_key_list