def __init__(self, settings=None, install_root_handler=True): super(CrawlerProcess, self).__init__(settings) # 安装shutdown处理机 install_shutdown_handlers(self._signal_shutdown) # 配置日志 configure_logging(self.settings, install_root_handler) log_scrapy_info(self.settings)
def __init__(self, settings): install_shutdown_handlers(self._signal_shutdown) self.settings = settings self.crawlers = {} self.plugins = {} self.stopping = False self._started = None
def start(self, stop_after_crawl=True, install_signal_handlers=True): """ This method starts a :mod:`~twisted.internet.reactor`, adjusts its pool size to :setting:`REACTOR_THREADPOOL_MAXSIZE`, and installs a DNS cache based on :setting:`DNSCACHE_ENABLED` and :setting:`DNSCACHE_SIZE`. If ``stop_after_crawl`` is True, the reactor will be stopped after all crawlers have finished, using :meth:`join`. :param bool stop_after_crawl: stop or not the reactor when all crawlers have finished :param bool install_signal_handlers: whether to install the shutdown handlers (default: True) """ from twisted.internet import reactor if stop_after_crawl: d = self.join() # Don't start the reactor if the deferreds are already fired if d.called: return d.addBoth(self._stop_reactor) if install_signal_handlers: install_shutdown_handlers(self._signal_shutdown) resolver_class = load_object(self.settings["DNS_RESOLVER"]) resolver = create_instance(resolver_class, self.settings, self, reactor=reactor) resolver.install_on_reactor() tp = reactor.getThreadPool() tp.adjustPoolsize(maxthreads=self.settings.getint('REACTOR_THREADPOOL_MAXSIZE')) reactor.addSystemEventTrigger('before', 'shutdown', self.stop) reactor.run(installSignalHandlers=False) # blocking call
def _signal_kill(self, signum, _): install_shutdown_handlers(signal.SIG_IGN) signame = signal_names[signum] log.msg(format='Received %(signame)s twice, forcing unclean shutdown', level=log.INFO, signame=signame) self._stop_logging() reactor.callFromThread(self._stop_reactor)
def _signal_shutdown(self, signum, _): install_shutdown_handlers(self._signal_kill) signame = signal_names[signum] log.msg( "Received %s, shutting down gracefully. Send again to force " "unclean shutdown" % signame, level=log.INFO ) reactor.callFromThread(self.stop)
def _signal_shutdown(self, signum, _): install_shutdown_handlers(self._signal_kill) signame = signal_names[signum] logger.info( "Received %(signame)s, shutting down gracefully. Send again to force ", {'signame': signame}) reactor.callFromThread(self._graceful_stop_reactor)
def _signal_kill(self, signum, _): signame = signal_names[signum] log.msg('Received %s twice, forcing unclean shutdown' % signame, \ level=log.INFO) log.log_level = log.SILENT # disable logging of confusing tracebacks reactor.callFromThread(self.engine.kill) install_shutdown_handlers(signal.SIG_IGN)
def _signal_kill(self, signum, _): install_shutdown_handlers(signal.SIG_IGN) signame = signal_names[signum] log.msg(format='Received %(signame)s twice, forcing unclean shutdown', level=log.INFO, signame=signame) reactor.callFromThread(self._stop_reactor)
def _signal_kill(self, signum, _): from twisted.internet import reactor install_shutdown_handlers(signal.SIG_IGN) signame = signal_names[signum] logger.info('Received %(signame)s twice, forcing unclean shutdown', {'signame': signame}) reactor.callFromThread(self._stop_reactor)
def __init__(self, settings=None, install_root_handler=True): ## 父类初始化 super(CrawlerProcess, self).__init__(settings) ## 处理 shutdown 信号 install_shutdown_handlers(self._signal_shutdown) ## 为 Scrapy 配置默认的日志服务 configure_logging(self.settings, install_root_handler) ## 输出 scrapy 的相关信息(启动状态,版本...) log_scrapy_info(self.settings)
def _signal_shutdown(self, signum, _): install_shutdown_handlers(self._signal_kill) signame = signal_names[signum] log.msg( format= "Received %(signame)s, shutting down gracefully. Send again to force ", level=log.INFO, signame=signame) reactor.callFromThread(self.stop)
def _init(self): self.settings = get_project_settings() self.engine = create_engine(create_mysql_conn_string()) self.session = Session(self.engine) if not getattr(self, "logger", None): self.logger = logging.getLogger(name=self.__class__.__name__) install_shutdown_handlers(self.signal_shutdown_handler, True)
def _signal_shutdown(self, signum, _): from twisted.internet import reactor install_shutdown_handlers(self._signal_kill) signame = signal_names[signum] logger.info( "Received %(signame)s, shutting down gracefully. Send again to force ", {"signame": signame}, ) reactor.callFromThread(self._graceful_stop_reactor)
def __init__(self, settings=None, install_root_handler=True): logging.info("CrawlerProcess __init__") super().__init__(settings) logging.info("super().__init__(settings)") install_shutdown_handlers(self._signal_shutdown) logging.info("install_shutdown_handlers(self._signal_shutdown)") configure_logging(self.settings, install_root_handler) logging.info("configure_logging(self.settings, install_root_handler)") log_scrapy_info(self.settings) logging.info("log_scrapy_info(self.settings)")
def __init__(self): super(TranslateSpider, self).__init__() #profile = webdriver.FirefoxProfile('/Users/zhangyuanan/Library/Application Support/Firefox/Profiles/npdra49b.new') #self.driver = webdriver.Firefox(profile) self.driver = webdriver.PhantomJS() self.driver.set_window_size(1120, 550) self.driver.implicitly_wait(10) self.db = RemoteDb() self.force_quit = False install_shutdown_handlers(self._quit_func)
def configure(self, control_reactor=True): self.control_reactor = control_reactor if control_reactor: install_shutdown_handlers(self._signal_shutdown) reactor.addSystemEventTrigger('before', 'shutdown', scrapyengine.stop) if not log.started: log.start() if not extensions.loaded: extensions.load() if not spiders.loaded: spiders.load() log.msg("Enabled extensions: %s" % ", ".join(extensions.enabled.iterkeys()), level=log.DEBUG) scrapyengine.configure() self.configured = True
def configure(self, control_reactor=True, queue=None): self.control_reactor = control_reactor if control_reactor: install_shutdown_handlers(self._signal_shutdown) if not log.started: log.start() if not extensions.loaded: extensions.load() if not spiders.loaded: spiders.load() log.msg("Enabled extensions: %s" % ", ".join(extensions.enabled.iterkeys()), level=log.DEBUG) self.queue = queue or ExecutionQueue() self.engine.configure(self._spider_closed) self.configured = True
def _signal_kill(self, signum, _): install_shutdown_handlers(signal.SIG_IGN) signame = signal_names[signum] reactor.callFromThread(self._stop_reactor)
def __init__(self, settings=None): super(ScrapyCrawlerProcess, self).__init__(settings) install_shutdown_handlers(self._signal_shutdown)
def _signal_shutdown(self, signum, _): install_shutdown_handlers(self._signal_kill) signame = signal_names[signum] reactor.callFromThread(self._graceful_stop_reactor)
def __init__(self, *a, **kw): super(CrawlerProcess, self).__init__(*a, **kw) self.signals.connect(self.stop, signals.engine_stopped) install_shutdown_handlers(self._signal_shutdown)
def _signal_kill(self, signum, _): install_shutdown_handlers(signal.SIG_IGN) signame = signal_names[signum] logger.info('Received %(signame)s twice, forcing unclean shutdown', {'signame': signame}) reactor.callFromThread(self._stop_reactor)
def __init__(self, *a, **kw): install_shutdown_handlers(self._signal_shutdown)
def __init__(self, settings=None, install_root_handler=True): print("sssssssssssssssssssssssssssssssssssssssssss") super(CrawlerProcess, self).__init__(settings) install_shutdown_handlers(self._signal_shutdown) configure_logging(self.settings, install_root_handler) log_scrapy_info(self.settings)
def __init__(self, settings): install_shutdown_handlers(self._signal_shutdown) self.settings = settings self.crawlers = {} self.stopping = False self._started = None
def __init__(self, settings=None): super(CrawlerProcess, self).__init__(settings) install_shutdown_handlers(self._signal_shutdown) configure_logging(self.settings) log_scrapy_info(self.settings)
def _signal_shutdown(self, signum, _): install_shutdown_handlers(self._signal_kill) signame = signal_names[signum] logger.info("Received %(signame)s, shutting down gracefully. Send again to force ", {'signame': signame}) reactor.callFromThread(self._graceful_stop_reactor)
def __init__(self, db, chnls): self.db = db self.chnls = chnls self.running = False install_shutdown_handlers(self._signal_shutdown)
def __init__(self, settings=None, install_root_handler=False): crawler.CrawlerRunner.__init__(self, settings) install_shutdown_handlers(self._signal_shutdown) configure_logging(self.settings, install_root_handler=install_root_handler) log_scrapy_info(self.settings)
def __init__(self, settings): super(CrawlerProcess, self).__init__(settings) install_shutdown_handlers(self._signal_shutdown) self.stopping = False
def __init__(self, settings): super(CrawlerProcess, self).__init__(settings) install_shutdown_handlers(self._signal_shutdown) self.stopping = False self.log_observer = log.start_from_settings(self.settings) log.scrapy_info(settings)
def __init__(self, settings=None, install_root_handler=True): super().__init__(settings) install_shutdown_handlers(self._signal_shutdown) configure_logging(self.settings, install_root_handler) log_scrapy_info(self.settings)
def _signal_shutdown(self, signum, _): install_shutdown_handlers(self._signal_kill) signame = signal_names[signum] log.msg(format="Received %(signame)s, shutting down gracefully. Send again to force ", level=log.INFO, signame=signame) reactor.callFromThread(self.stop)
def __init__(self, settings=None, install_root_handler=True): super(CrawlerProcess, self).__init__(settings) install_shutdown_handlers( self._signal_shutdown) # 监控键盘按键,然后发送signal.signal(相关指令)进行控制 configure_logging(self.settings, install_root_handler) log_scrapy_info(self.settings)
def _signal_shutdown(self, signum, _): install_shutdown_handlers(self._signal_kill) signame = signal_names[signum] log.msg("Received %s, shutting down gracefully. Send again to force " \ "unclean shutdown" % signame, level=log.INFO) reactor.callFromThread(self.stop)
def _signal_kill(self, signum, _): install_shutdown_handlers(signal.SIG_IGN) signame = signal_names[signum] log.msg('Received %s twice, forcing unclean shutdown' % signame, log.INFO) reactor.callFromThread(self._stop_reactor)