class SchedulerTest(TestCase): def setUp(self): self.server = redis.Redis(REDIS_HOST, REDIS_PORT) self.key_prefix = 'scrapy_redis:tests:' self.queue_key = self.key_prefix + '%(spider)s:requests' self.dupefilter_key = self.key_prefix + '%(spider)s:dupefilter' self.idle_before_close = 0 self.scheduler = Scheduler(self.server, False, self.queue_key, SpiderQueue, self.dupefilter_key, self.idle_before_close) def tearDown(self): for key in self.server.keys(self.key_prefix): self.server.delete(key) def test_scheduler(self): # default no persist self.assertFalse(self.scheduler.persist) spider = Spider('myspider') self.scheduler.open(spider) self.assertEqual(len(self.scheduler), 0) req = Request('http://example.com') self.scheduler.enqueue_request(req) self.assertTrue(self.scheduler.has_pending_requests()) self.assertEqual(len(self.scheduler), 1) # dupefilter in action self.scheduler.enqueue_request(req) self.assertEqual(len(self.scheduler), 1) out = self.scheduler.next_request() self.assertEqual(out.url, req.url) self.assertFalse(self.scheduler.has_pending_requests()) self.assertEqual(len(self.scheduler), 0) self.scheduler.close('finish') def test_scheduler_persistent(self): messages = [] spider = Spider('myspider') spider.log = lambda *args, **kwargs: messages.append([args, kwargs]) self.scheduler.persist = True self.scheduler.open(spider) self.assertEqual(messages, []) self.scheduler.enqueue_request(Request('http://example.com/page1')) self.scheduler.enqueue_request(Request('http://example.com/page2')) self.assertTrue(self.scheduler.has_pending_requests()) self.scheduler.close('finish') self.scheduler.open(spider) self.assertEqual(messages, [ [('Resuming crawl (2 requests scheduled)',), {}], ]) self.assertEqual(len(self.scheduler), 2) self.scheduler.persist = False self.scheduler.close('finish') self.assertEqual(len(self.scheduler), 0)