def test_queue_get_requests_with_count(self): class DummyDownloaderWithWorkers(DummyDownloader): def get_workers_count(self): return 5 class SimpleQueue(BaseQueue): def __init__(self): self.requests = [] def get_requests(self, count=None): # Downloader can fetch only one request at moment assert count == 5 try: return self.requests.pop() except IndexError: return # empty queue def put_requests(self, request): self.requests.append(request) pomp = Pomp( downloader=DummyDownloaderWithWorkers(), middlewares=(url_to_request_middl, ), ) # override internal queue with own pomp.queue = SimpleQueue() pomp.pump(Crawler())
def test_queue_get_requests_with_count(self): class DummyDownloaderWithWorkers(DummyDownloader): def get_workers_count(self): return 5 class SimpleQueue(BaseQueue): def __init__(self): self.requests = [] def get_requests(self, count=None): # Downloader can fetch only one request at moment assert count == 5 try: return self.requests.pop() except IndexError: return # empty queue def put_requests(self, request): self.requests.append(request) pomp = Pomp( downloader=DummyDownloaderWithWorkers(), middlewares=(url_to_request_middl, ), ) # override internal queue with own pomp.queue = SimpleQueue() pomp.pump(Crawler())
def test_queue_crawler(self): road = RoadPipeline() class SimpleQueue(BaseQueue): def __init__(self): self.requests = [] def get_requests(self, count=None): # because downloader without workers assert count is None try: return self.requests.pop() except IndexError: return # empty queue def put_requests(self, request): self.requests.append(request) pomp = Pomp( downloader=DummyDownloader(), middlewares=[url_to_request_middl], pipelines=[ road, ], ) # override internal queue with own pomp.queue = SimpleQueue() pomp.pump(Crawler()) assert set([item.url for item in road.collection]) == set([ 'http://python.org/1', 'http://python.org/1/trash', 'http://python.org/2', ])
def test_queue_crawler(self): road = RoadPipeline() class SimpleQueue(BaseQueue): def __init__(self): self.requests = [] def get_requests(self, count=None): # because downloader without workers assert count is None try: return self.requests.pop() except IndexError: return # empty queue def put_requests(self, request): self.requests.append(request) pomp = Pomp( downloader=DummyDownloader(), middlewares=[url_to_request_middl], pipelines=[ road, ], ) # override internal queue with own pomp.queue = SimpleQueue() pomp.pump(Crawler()) assert set([item.url for item in road.collection]) == set([ 'http://python.org/1', 'http://python.org/1/trash', 'http://python.org/2', ])