Example #1
0
 def from_settings(cls, settings):
     server = connection.from_settings(settings)
     # create one-time key. needed to support to use this
     # class as standalone dupefilter with scrapy's default scheduler
     # if scrapy passes spider on open() method this wouldn't be needed
     key = "dupefilter:%s" % int(time.time())
     return cls(server, key)
Example #2
0
 def from_settings(cls, settings):
     server = connection.from_settings(settings)
     # create one-time key. needed to support to use this
     # class as standalone dupefilter with scrapy's default scheduler
     # if scrapy passes spider on open() method this wouldn't be needed
     key = "dupefilter:%s" % int(time.time())
     return cls(server, key)
Example #3
0
	def from_crawler(cls, crawler):
		settings = crawler.settings
		server = connection.from_settings(settings)
		pipe = cls(server)
		pipe.settings = settings
		pipe.crawler = crawler
		return pipe
Example #4
0
 def from_settings(cls, settings):
     persist = settings.get('SCHEDULER_PERSIST', SCHEDULER_PERSIST)
     queue_key = settings.get('SCHEDULER_QUEUE_KEY', QUEUE_KEY)
     queue_cls = load_object(settings.get('SCHEDULER_QUEUE_CLASS', QUEUE_CLASS))
     dupefilter_key = settings.get('DUPEFILTER_KEY', DUPEFILTER_KEY)
     idle_before_close = settings.get('SCHEDULER_IDLE_BEFORE_CLOSE', IDLE_BEFORE_CLOSE)
     server = connection.from_settings(settings)
     return cls(server, persist, queue_key, queue_cls, dupefilter_key, idle_before_close)
Example #5
0
    def setup_redis(self):
        """Setup redis connection and idle signal.

        This should be called after the spider has set its crawler object.
        """
        # if not self.redis_key:
            # self.redis_key = '%s:start_urls' % self.name
        self.server = connection.from_settings(self.crawler.settings)
        # idle signal is called when the spider has no requests left,
        # that's when we will schedule new requests from redis queue
        self.crawler.signals.connect(self.spider_idle, signal=signals.spider_idle)
        self.crawler.signals.connect(self.item_scraped, signal=signals.item_scraped)
Example #6
0
 def from_crawler(cls, crawler):
     settings = crawler.settings
     persist = settings.get('SCHEDULER_PERSIST', SCHEDULER_PERSIST)
     queue_key = settings.get('SCHEDULER_QUEUE_KEY', QUEUE_KEY)
     queue_cls = load_object(
         settings.get('SCHEDULER_QUEUE_CLASS', QUEUE_CLASS))
     dupefilter_key = settings.get('DUPEFILTER_KEY', DUPEFILTER_KEY)
     idle_before_close = settings.get('SCHEDULER_IDLE_BEFORE_CLOSE',
                                      IDLE_BEFORE_CLOSE)
     server = connection.from_settings(settings)
     instance = cls(server, persist, queue_key, queue_cls, dupefilter_key,
                    idle_before_close, crawler.stats)
     return instance
Example #7
0
    def setup_redis(self):
        """Setup redis connection and idle signal.

        This should be called after the spider has set its crawler object.
        """
        # if not self.redis_key:
        # self.redis_key = '%s:start_urls' % self.name
        self.server = connection.from_settings(self.crawler.settings)
        # idle signal is called when the spider has no requests left,
        # that's when we will schedule new requests from redis queue
        self.crawler.signals.connect(self.spider_idle,
                                     signal=signals.spider_idle)
        self.crawler.signals.connect(self.item_scraped,
                                     signal=signals.item_scraped)
Example #8
0
 def from_settings(cls, settings):
     server = connection.from_settings(settings)
     return cls(server)