예제 #1
0
    def test_queue_get_requests_with_count(self):

        class DummyDownloaderWithWorkers(DummyDownloader):

            def get_workers_count(self):
                return 5

        class SimpleQueue(BaseQueue):

            def __init__(self):
                self.requests = []

            def get_requests(self, count=None):
                # Downloader can fetch only one request at moment
                assert count == 5
                try:
                    return self.requests.pop()
                except IndexError:
                    return  # empty queue

            def put_requests(self, request):
                self.requests.append(request)

        pomp = Pomp(
            downloader=DummyDownloaderWithWorkers(),
            middlewares=(url_to_request_middl, ),
        )

        # override internal queue with own
        pomp.queue = SimpleQueue()

        pomp.pump(Crawler())
예제 #2
0
    def test_queue_get_requests_with_count(self):
        class DummyDownloaderWithWorkers(DummyDownloader):
            def get_workers_count(self):
                return 5

        class SimpleQueue(BaseQueue):
            def __init__(self):
                self.requests = []

            def get_requests(self, count=None):
                # Downloader can fetch only one request at moment
                assert count == 5
                try:
                    return self.requests.pop()
                except IndexError:
                    return  # empty queue

            def put_requests(self, request):
                self.requests.append(request)

        pomp = Pomp(
            downloader=DummyDownloaderWithWorkers(),
            middlewares=(url_to_request_middl, ),
        )

        # override internal queue with own
        pomp.queue = SimpleQueue()

        pomp.pump(Crawler())
예제 #3
0
    def test_queue_crawler(self):
        road = RoadPipeline()

        class SimpleQueue(BaseQueue):

            def __init__(self):
                self.requests = []

            def get_requests(self, count=None):
                # because downloader without workers
                assert count is None
                try:
                    return self.requests.pop()
                except IndexError:
                    return  # empty queue

            def put_requests(self, request):
                self.requests.append(request)

        pomp = Pomp(
            downloader=DummyDownloader(),
            middlewares=[url_to_request_middl],
            pipelines=[
                road,
            ],
        )

        # override internal queue with own
        pomp.queue = SimpleQueue()

        pomp.pump(Crawler())

        assert set([item.url for item in road.collection]) == set([
            'http://python.org/1',
            'http://python.org/1/trash',
            'http://python.org/2',
        ])
예제 #4
0
    def test_queue_crawler(self):
        road = RoadPipeline()

        class SimpleQueue(BaseQueue):
            def __init__(self):
                self.requests = []

            def get_requests(self, count=None):
                # because downloader without workers
                assert count is None
                try:
                    return self.requests.pop()
                except IndexError:
                    return  # empty queue

            def put_requests(self, request):
                self.requests.append(request)

        pomp = Pomp(
            downloader=DummyDownloader(),
            middlewares=[url_to_request_middl],
            pipelines=[
                road,
            ],
        )

        # override internal queue with own
        pomp.queue = SimpleQueue()

        pomp.pump(Crawler())

        assert set([item.url for item in road.collection]) == set([
            'http://python.org/1',
            'http://python.org/1/trash',
            'http://python.org/2',
        ])