def configure_request_sharing(self): if not hasattr(self._baseclass, '_queue_size'): self._baseclass._queue_size = 0 if not hasattr(self._baseclass, 'shared_dupefilter'): self._baseclass.shared_dupefilter = RFPDupeFilter.from_settings( self.settings) if not hasattr(self._baseclass, '_request_queue'): self._baseclass._request_queue = PriorityQueue()
def from_settings(cls, settings): """Returns an instance from given settings. This uses by default the key ``dupefilter:<timestamp>``. When using the ``scrapy_redis.scheduler.Scheduler`` class, this method is not used as it needs to pass the spider name in the key. """ if not settings.getbool('SCRAPY_REDIS_ENABLED'): return RFPDupeFilter.from_settings(settings) server = get_redis_from_settings(settings) key = defaults.DUPEFILTER_KEY % {'timestamp': int(time.time())} debug = settings.getbool('DUPEFILTER_DEBUG') instance = cls(server, key=key, debug=debug) if settings.getbool('BLOOMFILTER_ENABLED'): instance.bloomfilter = BloomFilter(server, key) instance.request_seen = instance.bloom_request_seen return instance