def start(self, stop_after_crawl=True): """ This method starts a Twisted `reactor`_, adjusts its pool size to :setting:`REACTOR_THREADPOOL_MAXSIZE`, and installs a DNS cache based on :setting:`DNSCACHE_ENABLED` and :setting:`DNSCACHE_SIZE`. If `stop_after_crawl` is True, the reactor will be stopped after all crawlers have finished, using :meth:`join`. :param boolean stop_after_crawl: stop or not the reactor when all crawlers have finished """ if stop_after_crawl: d = self.join() # Don't start the reactor if the deferreds are already fired if d.called: return d.addBoth(lambda _: self._stop_reactor()) cache_size = self.settings.getint( 'DNSCACHE_SIZE') if self.settings.getbool( 'DNSCACHE_ENABLED') else 0 reactor.installResolver( CachingThreadedResolver(reactor, cache_size, self.settings.getfloat('DNS_TIMEOUT'))) tp = reactor.getThreadPool() tp.adjustPoolsize( maxthreads=self.settings.getint('REACTOR_THREADPOOL_MAXSIZE')) reactor.addSystemEventTrigger('before', 'shutdown', self.stop) reactor.run(installSignalHandlers=False) # blocking call
def _get_dns_resolver(self): if self.settings.getbool('DNSCACHE_ENABLED'): cache_size = self.settings.getint('DNSCACHE_SIZE') else: cache_size = 0 return CachingThreadedResolver( reactor=reactor, cache_size=cache_size, timeout=self.settings.getfloat('DNS_TIMEOUT'))
def _start_reactor(self, stop_after_crawl=True): if stop_after_crawl: d = defer.DeferredList(self.crawl_deferreds) if d.called: # Don't start the reactor if the deferreds are already fired return d.addBoth(lambda _: self._stop_reactor()) if self.settings.getbool('DNSCACHE_ENABLED'): reactor.installResolver(CachingThreadedResolver(reactor)) reactor.addSystemEventTrigger('before', 'shutdown', self.stop) reactor.run(installSignalHandlers=False) # blocking call
def start(self, stop_after_crawl=True): if stop_after_crawl: d = self.join() # Don't start the reactor if the deferreds are already fired if d.called: return d.addBoth(lambda _: self._stop_reactor()) if self.settings.getbool('DNSCACHE_ENABLED'): reactor.installResolver(CachingThreadedResolver(reactor)) tp = reactor.getThreadPool() tp.adjustPoolsize( maxthreads=self.settings.getint('REACTOR_THREADPOOL_MAXSIZE')) reactor.addSystemEventTrigger('before', 'shutdown', self.stop) reactor.run(installSignalHandlers=False) # blocking call
def start(self): super(CrawlerProcess, self).start() if self.settings.getbool('DNSCACHE_ENABLED'): reactor.installResolver(CachingThreadedResolver(reactor)) reactor.addSystemEventTrigger('before', 'shutdown', self.stop) reactor.run(installSignalHandlers=False) # blocking call