예제 #1
0
def reload_and_config():
    # 重新load
    from common.common import get_proxy
    import common.insert_db
    insert_db = common.insert_db
    get_proxy = get_proxy
    debug = False
    spider_factory.config_spider(insert_db, get_proxy, debug)
예제 #2
0
import mioji.common.pool
import mioji.common.pages_store
from toolbox import Common
mioji.common.pool.pool.set_size(128)

logger = get_task_logger('daodaoHotel')
mioji.common.logger.logger = logger
mioji.common.pages_store.cache_dir = list_cache_path
mioji.common.pages_store.STORE_TYPE = cache_type
# 初始化工作 (程序启动时执行一次即可)
insert_db = None
# get_proxy = simple_get_socks_proxy
get_proxy = proxy_pool.get_proxy
debug = False
spider_factory.config_spider(insert_db,
                             get_proxy,
                             debug,
                             need_flip_limit=False)


def hotel_url_to_database(tid,
                          used_times,
                          source,
                          keyword,
                          spider_tag,
                          need_cache=False):
    task = Task()
    task.ticket_info['hotel_name'] = keyword
    spider = factory.get_spider_by_old_source(spider_tag)
    spider.task = task
    error_code = spider.crawl(required=['hotel'],
                              cache_config=none_cache_config)
예제 #3
0
import json
import traceback
import dataset
import common.common
from mioji.spider_factory import factory
from mioji.common.task_info import Task
from proj.celery import app
from proj.my_lib.BaseTask import BaseTask
from proj.my_lib.task_module.task_func import update_task, insert_task, get_task_id
from mioji import spider_factory

# 初始化工作 (程序启动时执行一次即可)
insert_db = None
get_proxy = common.common.get_proxy
debug = False
spider_factory.config_spider(insert_db, get_proxy, debug)

hotel_default = {'check_in': '20171203', 'nights': 1, 'rooms': [{}]}
hotel_rooms = {
    'check_in': '20171203',
    'nights': 1,
    'rooms': [{
        'adult': 1,
        'child': 3
    }]
}
hotel_rooms_c = {
    'check_in': '20171203',
    'nights': 1,
    'rooms': [{
        'adult': 1,
예제 #4
0
from proj.mysql_pool import service_platform_pool
from proj.my_lib.Common.Browser import proxy_pool

logger = get_logger("poiDaodao")

mioji.common.spider.NEED_FLIP_LIMIT = False
mioji.common.pool.pool.set_size(2024)
mioji.common.pages_store.cache_dir = list_cache_path
mioji.common.pages_store.STORE_TYPE = cache_type

# 初始化工作 (程序启动时执行一次即可)
insert_db = None
# get_proxy = simple_get_socks_proxy
get_proxy = proxy_pool.get_proxy
debug = True
spider_factory.config_spider(insert_db, None, debug)
mioji.common.spider.NEED_FLIP_LIMIT = False

mioji.common.logger.logger = logger
client = pymongo.MongoClient(host='10.10.213.148')
collections = client['data_result']['daodao']

URL = 'https://www.tripadvisor.cn'
SQL = """insert ignore into {table_name} (source, source_id, city_id, country_id, hotel_url) values(%s, %s, %s, %s, %s)"""
type_dict = {'attr': 'view', 'rest': 'restaurant'}
spider_name = {'attr': 'View', 'rest': 'Rest'}


def hotel_list_database(source,
                        url,
                        required,