Example #1
0
 def __init__(self, settings=None, install_root_handler=True):
     super(CrawlerProcess, self).__init__(settings)
     # 安装shutdown处理机
     install_shutdown_handlers(self._signal_shutdown)
     # 配置日志
     configure_logging(self.settings, install_root_handler)
     log_scrapy_info(self.settings)
Example #2
0
 def __init__(self, settings):
     install_shutdown_handlers(self._signal_shutdown)
     self.settings = settings
     self.crawlers = {}
     self.plugins = {}
     self.stopping = False
     self._started = None
Example #3
0
    def start(self, stop_after_crawl=True, install_signal_handlers=True):
        """
        This method starts a :mod:`~twisted.internet.reactor`, adjusts its pool
        size to :setting:`REACTOR_THREADPOOL_MAXSIZE`, and installs a DNS cache
        based on :setting:`DNSCACHE_ENABLED` and :setting:`DNSCACHE_SIZE`.

        If ``stop_after_crawl`` is True, the reactor will be stopped after all
        crawlers have finished, using :meth:`join`.

        :param bool stop_after_crawl: stop or not the reactor when all
            crawlers have finished

        :param bool install_signal_handlers: whether to install the shutdown
            handlers (default: True)
        """
        from twisted.internet import reactor
        if stop_after_crawl:
            d = self.join()
            # Don't start the reactor if the deferreds are already fired
            if d.called:
                return
            d.addBoth(self._stop_reactor)

        if install_signal_handlers:
            install_shutdown_handlers(self._signal_shutdown)
        resolver_class = load_object(self.settings["DNS_RESOLVER"])
        resolver = create_instance(resolver_class, self.settings, self, reactor=reactor)
        resolver.install_on_reactor()
        tp = reactor.getThreadPool()
        tp.adjustPoolsize(maxthreads=self.settings.getint('REACTOR_THREADPOOL_MAXSIZE'))
        reactor.addSystemEventTrigger('before', 'shutdown', self.stop)
        reactor.run(installSignalHandlers=False)  # blocking call
Example #4
0
 def _signal_kill(self, signum, _):
     install_shutdown_handlers(signal.SIG_IGN)
     signame = signal_names[signum]
     log.msg(format='Received %(signame)s twice, forcing unclean shutdown',
             level=log.INFO, signame=signame)
     self._stop_logging()
     reactor.callFromThread(self._stop_reactor)
Example #5
0
 def _signal_shutdown(self, signum, _):
     install_shutdown_handlers(self._signal_kill)
     signame = signal_names[signum]
     log.msg(
         "Received %s, shutting down gracefully. Send again to force " "unclean shutdown" % signame, level=log.INFO
     )
     reactor.callFromThread(self.stop)
Example #6
0
 def _signal_shutdown(self, signum, _):
     install_shutdown_handlers(self._signal_kill)
     signame = signal_names[signum]
     logger.info(
         "Received %(signame)s, shutting down gracefully. Send again to force ",
         {'signame': signame})
     reactor.callFromThread(self._graceful_stop_reactor)
Example #7
0
 def _signal_kill(self, signum, _):
     signame = signal_names[signum]
     log.msg('Received %s twice, forcing unclean shutdown' % signame, \
         level=log.INFO)
     log.log_level = log.SILENT # disable logging of confusing tracebacks
     reactor.callFromThread(self.engine.kill)
     install_shutdown_handlers(signal.SIG_IGN)
Example #8
0
 def _signal_kill(self, signum, _):
     install_shutdown_handlers(signal.SIG_IGN)
     signame = signal_names[signum]
     log.msg(format='Received %(signame)s twice, forcing unclean shutdown',
             level=log.INFO,
             signame=signame)
     reactor.callFromThread(self._stop_reactor)
Example #9
0
 def _signal_kill(self, signum, _):
     from twisted.internet import reactor
     install_shutdown_handlers(signal.SIG_IGN)
     signame = signal_names[signum]
     logger.info('Received %(signame)s twice, forcing unclean shutdown',
                 {'signame': signame})
     reactor.callFromThread(self._stop_reactor)
Example #10
0
 def __init__(self, settings=None, install_root_handler=True):
     ## 父类初始化
     super(CrawlerProcess, self).__init__(settings)
     ## 处理 shutdown 信号
     install_shutdown_handlers(self._signal_shutdown)
     ## 为 Scrapy 配置默认的日志服务
     configure_logging(self.settings, install_root_handler)
     ## 输出 scrapy 的相关信息(启动状态,版本...)
     log_scrapy_info(self.settings)
Example #11
0
 def _signal_shutdown(self, signum, _):
     install_shutdown_handlers(self._signal_kill)
     signame = signal_names[signum]
     log.msg(
         format=
         "Received %(signame)s, shutting down gracefully. Send again to force ",
         level=log.INFO,
         signame=signame)
     reactor.callFromThread(self.stop)
Example #12
0
    def _init(self):
        self.settings = get_project_settings()
        self.engine = create_engine(create_mysql_conn_string())
        self.session = Session(self.engine)

        if not getattr(self, "logger", None):
            self.logger = logging.getLogger(name=self.__class__.__name__)

        install_shutdown_handlers(self.signal_shutdown_handler, True)
Example #13
0
    def _signal_shutdown(self, signum, _):
        from twisted.internet import reactor

        install_shutdown_handlers(self._signal_kill)
        signame = signal_names[signum]
        logger.info(
            "Received %(signame)s, shutting down gracefully. Send again to force ",
            {"signame": signame},
        )
        reactor.callFromThread(self._graceful_stop_reactor)
Example #14
0
 def __init__(self, settings=None, install_root_handler=True):
     logging.info("CrawlerProcess __init__")
     super().__init__(settings)
     logging.info("super().__init__(settings)")
     install_shutdown_handlers(self._signal_shutdown)
     logging.info("install_shutdown_handlers(self._signal_shutdown)")
     configure_logging(self.settings, install_root_handler)
     logging.info("configure_logging(self.settings, install_root_handler)")
     log_scrapy_info(self.settings)
     logging.info("log_scrapy_info(self.settings)")
Example #15
0
 def __init__(self):
     super(TranslateSpider, self).__init__()
     #profile = webdriver.FirefoxProfile('/Users/zhangyuanan/Library/Application Support/Firefox/Profiles/npdra49b.new')
     #self.driver = webdriver.Firefox(profile)
     self.driver = webdriver.PhantomJS()
     self.driver.set_window_size(1120, 550)
     self.driver.implicitly_wait(10) 
     self.db = RemoteDb()
     self.force_quit = False
     install_shutdown_handlers(self._quit_func)
Example #16
0
    def configure(self, control_reactor=True):
        self.control_reactor = control_reactor
        if control_reactor:
            install_shutdown_handlers(self._signal_shutdown)
        reactor.addSystemEventTrigger('before', 'shutdown', scrapyengine.stop)

        if not log.started:
            log.start()
        if not extensions.loaded:
            extensions.load()
        if not spiders.loaded:
            spiders.load()
        log.msg("Enabled extensions: %s" % ", ".join(extensions.enabled.iterkeys()),
            level=log.DEBUG)

        scrapyengine.configure()
        self.configured = True
Example #17
0
    def configure(self, control_reactor=True, queue=None):
        self.control_reactor = control_reactor
        if control_reactor:
            install_shutdown_handlers(self._signal_shutdown)

        if not log.started:
            log.start()
        if not extensions.loaded:
            extensions.load()
        if not spiders.loaded:
            spiders.load()
        log.msg("Enabled extensions: %s" % ", ".join(extensions.enabled.iterkeys()),
            level=log.DEBUG)

        self.queue = queue or ExecutionQueue()
        self.engine.configure(self._spider_closed)
        self.configured = True
Example #18
0
 def _signal_kill(self, signum, _):
     install_shutdown_handlers(signal.SIG_IGN)
     signame = signal_names[signum]
     reactor.callFromThread(self._stop_reactor)
Example #19
0
 def __init__(self, settings=None):
     super(ScrapyCrawlerProcess, self).__init__(settings)
     install_shutdown_handlers(self._signal_shutdown)
Example #20
0
 def __init__(self, settings=None):
     super(ScrapyCrawlerProcess, self).__init__(settings)
     install_shutdown_handlers(self._signal_shutdown)
Example #21
0
 def _signal_shutdown(self, signum, _):
     install_shutdown_handlers(self._signal_kill)
     signame = signal_names[signum]
     reactor.callFromThread(self._graceful_stop_reactor)
Example #22
0
 def __init__(self, *a, **kw):
     super(CrawlerProcess, self).__init__(*a, **kw)
     self.signals.connect(self.stop, signals.engine_stopped)
     install_shutdown_handlers(self._signal_shutdown)
Example #23
0
 def _signal_kill(self, signum, _):
     install_shutdown_handlers(signal.SIG_IGN)
     signame = signal_names[signum]
     logger.info('Received %(signame)s twice, forcing unclean shutdown',
                 {'signame': signame})
     reactor.callFromThread(self._stop_reactor)
Example #24
0
 def _signal_kill(self, signum, _):
     install_shutdown_handlers(signal.SIG_IGN)
     signame = signal_names[signum]
     reactor.callFromThread(self._stop_reactor)
Example #25
0
 def __init__(self, *a, **kw):
     install_shutdown_handlers(self._signal_shutdown)
Example #26
0
 def __init__(self, settings=None, install_root_handler=True):
     print("sssssssssssssssssssssssssssssssssssssssssss")
     super(CrawlerProcess, self).__init__(settings)
     install_shutdown_handlers(self._signal_shutdown)
     configure_logging(self.settings, install_root_handler)
     log_scrapy_info(self.settings)
Example #27
0
 def __init__(self, settings):
     install_shutdown_handlers(self._signal_shutdown)
     self.settings = settings
     self.crawlers = {}
     self.stopping = False
     self._started = None
Example #28
0
 def __init__(self, settings=None):
     super(CrawlerProcess, self).__init__(settings)
     install_shutdown_handlers(self._signal_shutdown)
     configure_logging(self.settings)
     log_scrapy_info(self.settings)
Example #29
0
 def _signal_shutdown(self, signum, _):
     install_shutdown_handlers(self._signal_kill)
     signame = signal_names[signum]
     logger.info("Received %(signame)s, shutting down gracefully. Send again to force ",
                 {'signame': signame})
     reactor.callFromThread(self._graceful_stop_reactor)
Example #30
0
    def __init__(self, db, chnls):
        self.db = db
        self.chnls = chnls
        self.running = False

        install_shutdown_handlers(self._signal_shutdown)
Example #31
0
 def __init__(self, settings=None, install_root_handler=False):
     crawler.CrawlerRunner.__init__(self, settings)
     install_shutdown_handlers(self._signal_shutdown)
     configure_logging(self.settings, install_root_handler=install_root_handler)
     log_scrapy_info(self.settings)
Example #32
0
 def __init__(self, settings):
     super(CrawlerProcess, self).__init__(settings)
     install_shutdown_handlers(self._signal_shutdown)
     self.stopping = False
Example #33
0
 def __init__(self, settings):
     super(CrawlerProcess, self).__init__(settings)
     install_shutdown_handlers(self._signal_shutdown)
     self.stopping = False
     self.log_observer = log.start_from_settings(self.settings)
     log.scrapy_info(settings)
Example #34
0
 def __init__(self, settings=None, install_root_handler=True):
     super().__init__(settings)
     install_shutdown_handlers(self._signal_shutdown)
     configure_logging(self.settings, install_root_handler)
     log_scrapy_info(self.settings)
Example #35
0
 def __init__(self, settings=None):
     super(CrawlerProcess, self).__init__(settings)
     install_shutdown_handlers(self._signal_shutdown)
     configure_logging(self.settings)
     log_scrapy_info(self.settings)
Example #36
0
 def _signal_shutdown(self, signum, _):
     install_shutdown_handlers(self._signal_kill)
     signame = signal_names[signum]
     log.msg(format="Received %(signame)s, shutting down gracefully. Send again to force ",
             level=log.INFO, signame=signame)
     reactor.callFromThread(self.stop)
Example #37
0
 def __init__(self, *a, **kw):
     install_shutdown_handlers(self._signal_shutdown)
Example #38
0
 def __init__(self, settings):
     super(CrawlerProcess, self).__init__(settings)
     install_shutdown_handlers(self._signal_shutdown)
     self.stopping = False
Example #39
0
 def __init__(self, settings=None, install_root_handler=True):
     super(CrawlerProcess, self).__init__(settings)
     install_shutdown_handlers(
         self._signal_shutdown)  # 监控键盘按键,然后发送signal.signal(相关指令)进行控制
     configure_logging(self.settings, install_root_handler)
     log_scrapy_info(self.settings)
Example #40
0
 def _signal_shutdown(self, signum, _):
     install_shutdown_handlers(self._signal_kill)
     signame = signal_names[signum]
     log.msg("Received %s, shutting down gracefully. Send again to force " \
         "unclean shutdown" % signame, level=log.INFO)
     reactor.callFromThread(self.stop)
Example #41
0
 def __init__(self, settings):
     super(CrawlerProcess, self).__init__(settings)
     install_shutdown_handlers(self._signal_shutdown)
     self.stopping = False
     self.log_observer = log.start_from_settings(self.settings)
     log.scrapy_info(settings)
Example #42
0
 def _signal_shutdown(self, signum, _):
     install_shutdown_handlers(self._signal_kill)
     signame = signal_names[signum]
     reactor.callFromThread(self._graceful_stop_reactor)
Example #43
0
 def _signal_kill(self, signum, _):
     install_shutdown_handlers(signal.SIG_IGN)
     signame = signal_names[signum]
     log.msg('Received %s twice, forcing unclean shutdown' % signame, log.INFO)
     reactor.callFromThread(self._stop_reactor)
Example #44
0
    def __init__(self, db, chnls):
        self.db = db
        self.chnls = chnls
        self.running = False

        install_shutdown_handlers(self._signal_shutdown)