def open(self, spider): self.spider = spider self.spider.set_logger(self.logger) self.create_queues() self.setup_zookeeper() self.dupefilter = RFPDupeFilter(self.redis_conn, self.spider.name + ':dupefilter', self.rfp_timeout)
def open(self, spider): self.spider = spider self.spider.set_logger(self.logger) self.create_queues() self.setup_zookeeper() self.dupefilter = RFPDupeFilter(self.redis_conn, self.spider.name + ':dupefilter', self.rfp_timeout) self.global_page_per_domain_filter = RFGlobalPagePerDomainFilter( self.redis_conn, self.spider.name + ':global_page_count_filter', self.global_page_per_domain_limit, self.global_page_per_domain_limit_timeout) self.domain_max_page_filter = RFDomainMaxPageFilter( self.redis_conn, self.spider.name + ':domain_max_page_filter', self.domain_max_page_timeout)
def setUp(self): self.dupe = RFPDupeFilter(MagicMock(), 'key', 1)