def __init__(self): self.log = get_log_config() self.session = Session() self.reqmiddleware = ReqMiddleware() self.resmiddleware = ResMiddleware() self.req_ex = ReqExceptions() self.timeout_errors = self.req_ex.get_timeout_errors() self.other_request_errors = self.req_ex.get_other_request_errors()
def __init__(self): self.seed_name = args.s self.run_name = args.r self.clear_name = args.c self.cleardup_name = args.cd self.runspider_name = args.runspider self.view_name = args.v self.kill_name = args.k self.restart_name = args.rs self.name = (self.seed_name or self.run_name or self.clear_name or self.cleardup_name or self.runspider_name or self.view_name or self.kill_name or self.restart_name) self.name_seed = ':'.join([self.name, 'seed']) self.name_dup = ':'.join([self.name, 'dup']) self.log = get_log_config(self.name) self.seedmanager = SeedManager() self.spider_config = get_config('spider_config.ini').get( 'SPIDERS', self.name) self.python_env = get_config('settings.ini').get( 'FRAME_SETTINGS', 'PYTHON_ENV')
def __init__(self): if not self.name.strip(): raise Exception('子类爬虫必须重写 name ,请为爬虫命名。') self.name = self.name.lower() self.__name_seed = ':'.join([self.name, 'seed']) self.__name_dup = ':'.join([self.name, 'dup']) self.log = get_log_config(self.name) self.__req = Requester() self.__manager = SeedManager() self.__dup = Duplicater() self.__mongopipe = MongoPipe() self.__sched = BlockingScheduler() self.__settings = get_config('settings.ini') self.__debug = self.__settings.getboolean('MODE', 'DEBUG') self.__sleep_interval = self.__settings.getint('FRAME_SETTINGS', 'SLEEP_INTERVAL') self.__req_count = 0 self.__resp_count = 0 self.__save_count = 0 self.__err_count = 0
def __init__(self): self.log = get_log_config()
def __init__(self): self.log = get_log_config() self.mongoconn = MongoConn() self.db = self.mongoconn.get_db self.spidermiddleware = SpiderMiddleware()
def __init__(self): self.client = RedisConn().get_client self.log = get_log_config()
def __init__(self): self.client = RedisConn().get_client self.log = get_log_config('SeedManager') self.spidermiddleware = SpiderMiddleware()
def __init__(self): self.log = get_log_config() self.dup = Duplicater() self.dupname_suffix = 'dup'