class AllManager(object): env = get_config('settings.ini').get('FRAME_SETTINGS', 'PYTHON_ENV') names = get_config('spider_config.ini').options('SPIDERS') arg_command_map = { 'view': '{} manager.py -v {}', 'kill': '{} manager.py -k {}', 'restart': '{} manager.py -rs {}', } def __init__(self): self.arg_inst = self._get_arg_key() def _get_arg_key(self): arg_kwargs = args._get_kwargs() for group in arg_kwargs: if group[1] == 'all': arg_inst = group[0] return arg_inst def _get_command(self, arg, name): return self.arg_command_map[arg].format(self.env, name) def run(self): _task = [ Process(target=self._execute_command, args=(name, )) for name in self.names ] for item_task in _task: item_task.start() del _task return def _execute_command(self, name): popen(self._get_command(self.arg_inst, name))
def __init__(self): self.debug_cfg = get_config('settings.ini') self.debug = self.debug_cfg.getboolean('MODE', 'DEBUG') self.config = get_config( 'debug_config.ini' if self.debug else 'server_config.ini') self.host = self.config.get('MONGO', 'HOST') self.port = self.config.getint('MONGO', 'PORT') self.db = self.config.get('MONGO', 'DB') self.username = self.config.get('MONGO', 'USERNAME') self.password = self.config.get('MONGO', 'PASSWORD') self._client = MongoClient(self.host, self.port) self.db = self._client[self.db] if all([self.username, self.password]): self.db.authenticate(self.username, self.password)
def __init__(self): self.debug_cfg = get_config('settings.ini') self.debug = self.debug_cfg.getboolean('MODE', 'DEBUG') self.config = get_config( 'debug_config.ini' if self.debug else 'server_config.ini') self.host = self.config.get('REDIS', 'HOST') self.port = self.config.get('REDIS', 'PORT') self.db = self.config.getint('REDIS', 'DB') self.password = self.config.get('REDIS', 'PASSWORD') self.pool = redis.ConnectionPool(host=self.host, port=self.port, db=self.db, password=self.password) self.client = redis.StrictRedis(connection_pool=self.pool)
def __spider_conf(self): _spider_config = get_config('spider_config.ini').get( 'SPIDERS', self.name) spider_conf = eval(_spider_config) spider_conf.pop('file') spider_conf.pop('class') return spider_conf
def __init__(self): self.seed_name = args.s self.run_name = args.r self.clear_name = args.c self.cleardup_name = args.cd self.runspider_name = args.runspider self.view_name = args.v self.kill_name = args.k self.restart_name = args.rs self.name = (self.seed_name or self.run_name or self.clear_name or self.cleardup_name or self.runspider_name or self.view_name or self.kill_name or self.restart_name) self.name_seed = ':'.join([self.name, 'seed']) self.name_dup = ':'.join([self.name, 'dup']) self.log = get_log_config(self.name) self.seedmanager = SeedManager() self.spider_config = get_config('spider_config.ini').get( 'SPIDERS', self.name) self.python_env = get_config('settings.ini').get( 'FRAME_SETTINGS', 'PYTHON_ENV')
def __init__(self, name): self.config = get_config('settings.ini') self.LOG_ENABLED = self.config.getboolean('LOG', 'LOG_ENABLED') self.LOG_TO_CONSOLE = self.config.getboolean('LOG', 'LOG_TO_CONSOLE') self.LOG_TO_FILE = self.config.getboolean('LOG', 'LOG_TO_FILE') self.LOG_LEVEL = self.config.get('LOG', 'LOG_LEVEL') self.LOG_FORMAT = self.config.get('LOG', 'LOG_FORMAT') self.LOG_PATH = self.config.get('LOG', 'LOG_PATH') self.name = name self.logger = logging.getLogger(name=self.name) self.logger.setLevel(self.LOG_LEVEL) self.log_to_console() self.log_to_file() self.logger.propagate = False
def __init__(self): if not self.name.strip(): raise Exception('子类爬虫必须重写 name ,请为爬虫命名。') self.name = self.name.lower() self.__name_seed = ':'.join([self.name, 'seed']) self.__name_dup = ':'.join([self.name, 'dup']) self.log = get_log_config(self.name) self.__req = Requester() self.__manager = SeedManager() self.__dup = Duplicater() self.__mongopipe = MongoPipe() self.__sched = BlockingScheduler() self.__settings = get_config('settings.ini') self.__debug = self.__settings.getboolean('MODE', 'DEBUG') self.__sleep_interval = self.__settings.getint('FRAME_SETTINGS', 'SLEEP_INTERVAL') self.__req_count = 0 self.__resp_count = 0 self.__save_count = 0 self.__err_count = 0