def test_redirect_disabled_enqueue_requests(self):
     settings = Settings()
     settings['REDIRECT_ENABLED'] = False
     crawler = FakeCrawler(settings)
     fs = FronteraScheduler(crawler, manager=FakeFrontierManager)
     fs.open(Spider)
     assert fs.enqueue_request(rr1) is False
     assert fs.enqueue_request(rr2) is False
     assert fs.enqueue_request(rr3) is False
Beispiel #2
0
 def test_redirect_disabled_enqueue_requests(self):
     settings = Settings()
     settings['REDIRECT_ENABLED'] = False
     crawler = FakeCrawler(settings)
     fs = FronteraScheduler(crawler, manager=FakeFrontierManager)
     fs.open(Spider)
     assert fs.enqueue_request(rr1) is False
     assert fs.enqueue_request(rr2) is False
     assert fs.enqueue_request(rr3) is False
 def test_enqueue_requests(self):
     crawler = FakeCrawler()
     fs = FronteraScheduler(crawler, manager=FakeFrontierManager)
     fs.open(Spider)
     assert fs.enqueue_request(r1) is True
     assert fs.enqueue_request(r2) is True
     assert fs.enqueue_request(r3) is True
     assert set(seed.url for seed in fs.frontier.manager.seeds) == set([r1.url, r2.url, r3.url])
     assert all([isinstance(seed, FRequest) for seed in fs.frontier.manager.seeds])
     assert fs.stats_manager.stats.get_value('frontera/seeds_count') == 3
 def test_redirect_enabled_enqueue_requests(self):
     settings = Settings()
     settings['REDIRECT_ENABLED'] = True
     crawler = FakeCrawler(settings)
     fs = FronteraScheduler(crawler, manager=FakeFrontierManager)
     fs.open(Spider)
     assert fs.enqueue_request(rr1) is True
     assert fs.enqueue_request(rr2) is True
     assert fs.enqueue_request(rr3) is True
     assert set([request.url for request in fs._pending_requests]) == set([rr1.url, rr2.url, rr3.url])
Beispiel #5
0
 def test_redirect_enabled_enqueue_requests(self):
     settings = Settings()
     settings['REDIRECT_ENABLED'] = True
     crawler = FakeCrawler(settings)
     fs = FronteraScheduler(crawler, manager=FakeFrontierManager)
     fs.open(Spider)
     assert fs.enqueue_request(rr1) is True
     assert fs.enqueue_request(rr2) is True
     assert fs.enqueue_request(rr3) is True
     assert set([request.url for request in fs._pending_requests
                 ]) == set([rr1.url, rr2.url, rr3.url])
 def test_redirect_disabled_enqueue_requests(self):
     settings = Settings()
     settings['REDIRECT_ENABLED'] = False
     crawler = FakeCrawler(settings)
     fs = FronteraScheduler(crawler, manager=FakeFrontierManager)
     fs.open(Spider)
     assert fs.enqueue_request(rr1) is False
     assert fs.enqueue_request(rr2) is False
     assert fs.enqueue_request(rr3) is False
     assert len(fs.frontier.manager.seeds) == 0
     assert fs.stats_manager.stats.get_value('frontera/seeds_count') == None
 def test_enqueue_requests(self):
     crawler = FakeCrawler()
     fs = FronteraScheduler(crawler, manager=FakeFrontierManager)
     fs.open(Spider)
     assert fs.enqueue_request(r1) is True
     assert fs.enqueue_request(r2) is True
     assert fs.enqueue_request(r3) is True
     assert set(seed.url for seed in fs.frontier.manager.seeds) == set(
         [r1.url, r2.url, r3.url])
     assert all(
         [isinstance(seed, FRequest) for seed in fs.frontier.manager.seeds])
     assert fs.stats_manager.stats.get_value('frontera/seeds_count') == 3
 def test_redirect_disabled_enqueue_requests(self):
     settings = Settings()
     settings['REDIRECT_ENABLED'] = False
     crawler = FakeCrawler(settings)
     fs = FronteraScheduler(crawler, manager=FakeFrontierManager)
     fs.open(Spider)
     assert fs.enqueue_request(rr1) is False
     assert fs.enqueue_request(rr2) is False
     assert fs.enqueue_request(rr3) is True
     assert isinstance(fs.frontier.manager.seeds[0], FRequest)
     assert len(fs.frontier.manager.seeds) == 1
     assert fs.frontier.manager.seeds[0].url == rr3.url
     assert fs.stats_manager.stats.get_value('frontera/seeds_count') == 1
 def test_redirect_enabled_enqueue_requests(self):
     settings = Settings()
     settings['REDIRECT_ENABLED'] = True
     crawler = FakeCrawler(settings)
     fs = FronteraScheduler(crawler, manager=FakeFrontierManager)
     fs.open(Spider)
     assert fs.enqueue_request(rr1) is True
     assert fs.enqueue_request(rr2) is True
     assert fs.enqueue_request(rr3) is True
     assert len(fs.frontier.manager.seeds) == 1
     assert isinstance(fs.frontier.manager.seeds[0], FRequest)
     assert fs.frontier.manager.seeds[0].url == rr3.url
     assert set([request.url for request in fs._pending_requests]) == set([rr1.url, rr2.url])
     assert all([isinstance(request, Request) for request in fs._pending_requests])
     assert fs.stats_manager.stats.get_value('frontera/seeds_count') == 1
     assert fs.stats_manager.stats.get_value('frontera/redirected_requests_count') == 2
 def test_redirect_enabled_enqueue_requests(self):
     settings = Settings()
     settings['REDIRECT_ENABLED'] = True
     crawler = FakeCrawler(settings)
     fs = FronteraScheduler(crawler, manager=FakeFrontierManager)
     fs.open(Spider)
     assert fs.enqueue_request(rr1) is True
     assert fs.enqueue_request(rr2) is True
     assert fs.enqueue_request(rr3) is False
     assert set([request.url for request in fs._pending_requests
                 ]) == set([rr1.url, rr2.url])
     assert all(
         [isinstance(request, Request) for request in fs._pending_requests])
     assert fs.stats_manager.stats.get_value('frontera/seeds_count') == None
     assert fs.stats_manager.stats.get_value(
         'frontera/redirected_requests_count') == 2