Exemple #1
0
    def start(self, stop_after_crawl=True):
        """
        This method starts a Twisted `reactor`_, adjusts its pool size to
        :setting:`REACTOR_THREADPOOL_MAXSIZE`, and installs a DNS cache based
        on :setting:`DNSCACHE_ENABLED` and :setting:`DNSCACHE_SIZE`.

        If `stop_after_crawl` is True, the reactor will be stopped after all
        crawlers have finished, using :meth:`join`.

        :param boolean stop_after_crawl: stop or not the reactor when all
            crawlers have finished
        """
        if stop_after_crawl:
            d = self.join()
            # Don't start the reactor if the deferreds are already fired
            if d.called:
                return
            d.addBoth(lambda _: self._stop_reactor())

        cache_size = self.settings.getint(
            'DNSCACHE_SIZE') if self.settings.getbool(
                'DNSCACHE_ENABLED') else 0
        reactor.installResolver(
            CachingThreadedResolver(reactor, cache_size,
                                    self.settings.getfloat('DNS_TIMEOUT')))
        tp = reactor.getThreadPool()
        tp.adjustPoolsize(
            maxthreads=self.settings.getint('REACTOR_THREADPOOL_MAXSIZE'))
        reactor.addSystemEventTrigger('before', 'shutdown', self.stop)
        reactor.run(installSignalHandlers=False)  # blocking call
Exemple #2
0
 def _get_dns_resolver(self):
     if self.settings.getbool('DNSCACHE_ENABLED'):
         cache_size = self.settings.getint('DNSCACHE_SIZE')
     else:
         cache_size = 0
     return CachingThreadedResolver(
         reactor=reactor,
         cache_size=cache_size,
         timeout=self.settings.getfloat('DNS_TIMEOUT'))
Exemple #3
0
 def _start_reactor(self, stop_after_crawl=True):
     if stop_after_crawl:
         d = defer.DeferredList(self.crawl_deferreds)
         if d.called:
             # Don't start the reactor if the deferreds are already fired
             return
         d.addBoth(lambda _: self._stop_reactor())
     if self.settings.getbool('DNSCACHE_ENABLED'):
         reactor.installResolver(CachingThreadedResolver(reactor))
     reactor.addSystemEventTrigger('before', 'shutdown', self.stop)
     reactor.run(installSignalHandlers=False)  # blocking call
Exemple #4
0
    def start(self, stop_after_crawl=True):
        if stop_after_crawl:
            d = self.join()
            # Don't start the reactor if the deferreds are already fired
            if d.called:
                return
            d.addBoth(lambda _: self._stop_reactor())

        if self.settings.getbool('DNSCACHE_ENABLED'):
            reactor.installResolver(CachingThreadedResolver(reactor))

        tp = reactor.getThreadPool()
        tp.adjustPoolsize(
            maxthreads=self.settings.getint('REACTOR_THREADPOOL_MAXSIZE'))
        reactor.addSystemEventTrigger('before', 'shutdown', self.stop)
        reactor.run(installSignalHandlers=False)  # blocking call
Exemple #5
0
 def start(self):
     super(CrawlerProcess, self).start()
     if self.settings.getbool('DNSCACHE_ENABLED'):
         reactor.installResolver(CachingThreadedResolver(reactor))
     reactor.addSystemEventTrigger('before', 'shutdown', self.stop)
     reactor.run(installSignalHandlers=False) # blocking call