def hotel_list_database(source, url, required, old_spider_name, need_cache=True): try: task = Task() task.content = urljoin(URL, url) logger.info('%s %s' % (task.content, required)) task.source = source.lower().capitalize() + 'ListInfo' # spider = factory.get_spider('daodao', task.source) spider = factory.get_spider_by_old_source('daodao' + old_spider_name) spider.task = task if need_cache: code = spider.crawl(required=[required], cache_config=cache_config) else: code = spider.crawl(required=[required], cache_config=none_cache_config) others_info = { 'result': spider.result, 'save_page': json.dumps(spider.save_page), 'view_page_info': spider.view_page_info, 'restaurant_page_info': spider.restaurant_page_info } return code, spider.result.get( required, {}), others_info, spider.page_store_key_list except Exception as e: logger.error(traceback.format_exc(e)) raise e
def hotel_list_database(source, city_id): task = Task() task.content = str(city_id) + '&' + '2&{nights}&{check_in}'.format( **hotel_rooms) spider = factory.get_spider_by_old_source(source + 'ListHotel') spider.task = task print spider.crawl(required=['hotel']) return spider.result
def entry_test(task): spider = factory.get_spider_by_old_task(task) if spider is None: spider = factory.get_spider_by_old_source(task.source) if spider is None: return None spider.task = task return spider
def hotel_list_database(source, city_id, check_in): task = Task() task.content = str(city_id) + '&' + '2&1&{0}'.format(check_in) task.source = source + 'ListHotel' spider = factory.get_spider_by_old_source(task.source) spider.task = task code = spider.crawl(required=['hotel']) return code, spider.result
def hilton_to_database(tid, used_times, source, keyword, extra, spider_tag, need_cache=True): task = Task() task.content = keyword task.extra = extra spider = factory.get_spider_by_old_source(spider_tag) spider.task = task if need_cache: error_code = spider.crawl(required=['suggest'], cache_config=cache_config) else: error_code = spider.crawl(required=['suggest'], cache_config=none_cache_config) logger.info( str(len(spider.result['suggest'])) + ' -- ' + keyword) return error_code, spider.result['suggest']
def hotel_url_to_database(tid, used_times, source, keyword, spider_tag, need_cache=False): task = Task() task.ticket_info['hotel_name'] = keyword spider = factory.get_spider_by_old_source(spider_tag) spider.task = task error_code = spider.crawl(required=['hotel'], cache_config=none_cache_config) tem_dic = spider.result if len(spider.result['hotel']) <= 2: task2 = Task() task2.ticket_info['hotel_name'] = keyword spider2 = factory.get_spider_by_old_source(spider_tag) spider2.task = task2 error_code2 = spider2.crawl(required=['hotel'], cache_config=none_cache_config) for j in spider2.result['hotel']: tem_dic['hotel'].append(j) return error_code, tem_dic, spider.user_datas['search_result']
def hotel_detail_database(url, source, need_cache=True): task = Task() task.content = url spider = factory.get_spider_by_old_source(source + 'DetailHotel') spider.task = task spider.task.source = source if need_cache: error_code = spider.crawl(required=['hotel'], cache_config=cache_config) else: error_code = spider.crawl(required=['hotel'], cache_config=none_cache_config) logger.info( str(task.ticket_info) + ' -- ' + task.content + '--' + str(error_code)) return error_code, spider.result, spider.page_store_key_list
def hotel_url_to_database(source, keyword, need_cache=False): task = Task() task.ticket_info['url'] = keyword task.ticket_info['hotel_name'] = keyword old_target = source + 'ListHotel' spider = factory.get_spider_by_old_source(old_target) spider.task = task if need_cache: error_code = spider.crawl(required=['hotel'], cache_config=cache_config) else: error_code = spider.crawl(required=['hotel'], cache_config=none_cache_config) print(error_code) # if data_from == 'google': # return error_code,spider.result,spider.user_datas['search_result'] # print spider.result['hotel'] return error_code, spider.result['hotel']
def poidetail_to_database(tid, used_times, source, url, need_cache=True): task = Task() task.content = url task.ticket_info = { 'tid': tid, 'used_times': used_times } print (source + '_detail') spider = factory.get_spider_by_old_source(source+'_detail') spider.task = task if need_cache: error_code = spider.crawl(required=['POIdetail'], cache_config=cache_config) else: error_code = spider.crawl(required=['POIdetail'], cache_config=none_cache_config) print(error_code) logger.info(str(spider.result['POIdetail']) + ' -- ' + task.content) return error_code, spider.result['POIdetail'], spider.page_store_key_list
def hilton_to_database(tid, used_times, source, source_id, city_id, check_in, need_cache=True): task = Task() task.content = 'NULL&' + str(city_id) + '&' + str(source_id) + '&' + '2&{0}'.format(check_in) task.ticket_info = { 'tid': tid, 'used_times': used_times, 'room_info': [{"occ": 2, "num": 1}] } spider = factory.get_spider_by_old_source('hiltonHotel2') spider.task = task if need_cache: error_code = spider.crawl(required=['list', 'room'], cache_config=cache_config) else: error_code = spider.crawl(required=['list', 'room'], cache_config=none_cache_config) print(error_code) logger.info(str(spider.result['room']) + ' -- ' + task.content) return error_code, spider.result['room'], spider.page_store_key_list
def GTdetail_to_database(tid, used_times, source, ticket, need_cache=True): task = Task() task.ticket_info = { 'tid': tid, 'vacation_info': ticket, 'source': source, 'used_times': used_times } spider = factory.get_spider_by_old_source( '{}|vacation_detail'.format(source)) spider.task = task if need_cache: error_code = spider.crawl(required=['vacation'], cache_config=cache_config) else: error_code = spider.crawl(required=['vacation'], cache_config=none_cache_config) print(error_code) # logger.info(str(spider.result['vacation']) + ' -- ' + task.ticket_info['vacation_info']['url']) return error_code, spider.result['vacation'], spider.page_store_key_list
def qyer_list_to_database(tid, used_times, source, city_id, check_in, city_url, need_cache=True): task = Task() task.content = city_url task.ticket_info = {'tid': tid, 'used_times': used_times} spider = factory.get_spider_by_old_source('qyerList') spider.task = task if need_cache: error_code = spider.crawl(required=['list'], cache_config=cache_config) else: error_code = spider.crawl(required=['list'], cache_config=none_cache_config) print(error_code) logger.info(str(spider.result['list']) + ' -- ' + task.content) return error_code, spider.result[ 'list'], spider.page_store_key_list, spider.types_result_num
def hotel_list_database(source, check_in, suggest_type='1', suggest=''): # 初始化任务 task = Task() task.ticket_info = { "is_new_type": True, "suggest_type": int(suggest_type), "suggest": suggest, "check_in": str(check_in), "stay_nights": '1', "occ": '2', 'is_service_platform': True, 'tid': uuid.uuid4(), 'used_times': random.randint(1, 6), } task.content = '' # 初始化 spider spider = factory.get_spider_by_old_source(OLD_SOURCE) spider.task = task # 请求 error_code = spider.crawl(required=REQUIRED, cache_config=False) return error_code, spider.result, spider.page_store_key_list
def hotel_list_database(tid, used_times, source, city_id, check_in, is_new_type=False, suggest_type='1', suggest='', need_cache=True, flag=False): task = Task() task.source = source if not is_new_type: if source == 'hilton': task.content = check_in elif source == 'starwood': task.content = suggest + '&' elif source in ['hyatt']: task.content = '' elif source == 'gha': task.content = suggest else: task.content = str(city_id) + '&' + '2&1&{0}'.format(check_in) task.ticket_info = { "is_new_type": False, 'is_service_platform': True, 'tid': tid, 'used_times': used_times } if source == 'bestwest': description = suggest.split('&')[0] map_info = suggest.split('&')[1] map_info = map_info.split(',') task.content = '&{}&{}&2'.format(description, check_in) task.ticket_info = { 'locationLng': float(map_info[0]), 'locationLat': float(map_info[1]) } # task.content = '&印度喀拉拉邦恰拉库德伊&20180525&2' # task.ticket_info = {'locationLng': '13.404954', 'locationLat': '52.5200066'} else: task.ticket_info = { "is_new_type": True, "suggest_type": int(suggest_type), "suggest": suggest, "check_in": str(check_in), "stay_nights": '1', "city_id": city_id, "occ": '2', 'is_service_platform': True, 'tid': tid, 'used_times': used_times, } task.content = '' print(task.ticket_info) if flag: old_spider_tag = source + 'FilterHotel' required = ['filter'] else: old_spider_tag = source + 'ListHotel' required = ['hotel'] spider = factory.get_spider_by_old_source(old_spider_tag) spider.task = task if need_cache: error_code = spider.crawl(required=required, cache_config=cache_config) else: error_code = spider.crawl(required=required, cache_config=none_cache_config) # logger.info(str(task.ticket_info) + ' -- ' + '-'+str(error_code)+'-' +task.content) # logger.info(str(spider.result['hotel'][:100])) return error_code, spider.result, spider.page_store_key_list