def open(self, spider): self.spider = spider self.spider.set_logger(self.logger) self.spider.set_redis(self.redis_conn) self.spider.setup_stats() self.create_queues() self.setup_zookeeper() self.dupefilter = RFPDupeFilter(self.redis_conn, self.spider.name + ':dupefilter', self.rfp_timeout)
def open(self, spider): self.spider = spider self.queue = self.queue_cls(self.server, spider, self.queue_key) self.df = RFPDupeFilter(self.server, self.dupefilter_key % {'spider': spider.name}, 10) if self.idle_before_close < 0: self.idle_before_close = 0 # notice if there are requests already in the queue to resume the crawl if len(self.queue): spider.log("Resuming crawl (%d requests scheduled)" % len(self.queue))
def open(self, spider): self.spider = spider self.setup() self.dupefilter = RFPDupeFilter(self.redis_conn, self.spider.name + ':dupefilter', self.rfp_timeout)