def from_settings(cls, settings): server = connection.from_settings(settings) # create one-time key. needed to support to use this # class as standalone dupefilter with scrapy's default scheduler # if scrapy passes spider on open() method this wouldn't be needed key = "dupefilter:%s" % int(time.time()) return cls(server, key)
def from_settings(cls, settings): server = connection.from_settings(settings) # create one-time key. needed to support to use this # class as standalone dupefilter with scrapy's default scheduler # if scrapy passes spider on open() method this wouldn't be needed key = "dupefilter:%s" % int(time.time()) return cls(server, key)
def from_crawler(cls, crawler): settings = crawler.settings server = connection.from_settings(settings) pipe = cls(server) pipe.settings = settings pipe.crawler = crawler return pipe
def from_settings(cls, settings): persist = settings.get('SCHEDULER_PERSIST', SCHEDULER_PERSIST) queue_key = settings.get('SCHEDULER_QUEUE_KEY', QUEUE_KEY) queue_cls = load_object(settings.get('SCHEDULER_QUEUE_CLASS', QUEUE_CLASS)) dupefilter_key = settings.get('DUPEFILTER_KEY', DUPEFILTER_KEY) idle_before_close = settings.get('SCHEDULER_IDLE_BEFORE_CLOSE', IDLE_BEFORE_CLOSE) server = connection.from_settings(settings) return cls(server, persist, queue_key, queue_cls, dupefilter_key, idle_before_close)
def setup_redis(self): """Setup redis connection and idle signal. This should be called after the spider has set its crawler object. """ # if not self.redis_key: # self.redis_key = '%s:start_urls' % self.name self.server = connection.from_settings(self.crawler.settings) # idle signal is called when the spider has no requests left, # that's when we will schedule new requests from redis queue self.crawler.signals.connect(self.spider_idle, signal=signals.spider_idle) self.crawler.signals.connect(self.item_scraped, signal=signals.item_scraped)
def from_crawler(cls, crawler): settings = crawler.settings persist = settings.get('SCHEDULER_PERSIST', SCHEDULER_PERSIST) queue_key = settings.get('SCHEDULER_QUEUE_KEY', QUEUE_KEY) queue_cls = load_object( settings.get('SCHEDULER_QUEUE_CLASS', QUEUE_CLASS)) dupefilter_key = settings.get('DUPEFILTER_KEY', DUPEFILTER_KEY) idle_before_close = settings.get('SCHEDULER_IDLE_BEFORE_CLOSE', IDLE_BEFORE_CLOSE) server = connection.from_settings(settings) instance = cls(server, persist, queue_key, queue_cls, dupefilter_key, idle_before_close, crawler.stats) return instance
def setup_redis(self): """Setup redis connection and idle signal. This should be called after the spider has set its crawler object. """ # if not self.redis_key: # self.redis_key = '%s:start_urls' % self.name self.server = connection.from_settings(self.crawler.settings) # idle signal is called when the spider has no requests left, # that's when we will schedule new requests from redis queue self.crawler.signals.connect(self.spider_idle, signal=signals.spider_idle) self.crawler.signals.connect(self.item_scraped, signal=signals.item_scraped)
def from_settings(cls, settings): server = connection.from_settings(settings) return cls(server)