def configure_request_sharing(self):
        if not hasattr(self._baseclass, '_queue_size'):
            self._baseclass._queue_size = 0

        if not hasattr(self._baseclass, 'shared_dupefilter'):
            self._baseclass.shared_dupefilter = RFPDupeFilter.from_settings(
                self.settings)

        if not hasattr(self._baseclass, '_request_queue'):
            self._baseclass._request_queue = PriorityQueue()
예제 #2
0
    def from_settings(cls, settings):
        """Returns an instance from given settings.

        This uses by default the key ``dupefilter:<timestamp>``. When using the
        ``scrapy_redis.scheduler.Scheduler`` class, this method is not used as
        it needs to pass the spider name in the key.

        """
        if not settings.getbool('SCRAPY_REDIS_ENABLED'):
            return RFPDupeFilter.from_settings(settings)

        server = get_redis_from_settings(settings)

        key = defaults.DUPEFILTER_KEY % {'timestamp': int(time.time())}
        debug = settings.getbool('DUPEFILTER_DEBUG')

        instance = cls(server, key=key, debug=debug)
        if settings.getbool('BLOOMFILTER_ENABLED'):
            instance.bloomfilter = BloomFilter(server, key)
            instance.request_seen = instance.bloom_request_seen
        return instance