Exemple #1
0
    def fetch(self,
              urls: List[str]) -> Generator[List[FetchResult], None, None]:
        """Fetches urls parallel in batches and returns a generator that yields every fetched URL batch as a list of FetchResult objects."""

        loop = self._get_event_loop()
        queue = URLQueue(urls=urls)
        while not queue.is_empty():
            url_batch = [
                next(queue) for _ in range(self.fetch_batch_size)
                if not queue.is_empty()
            ]
            yield loop.run_until_complete(self._fetch_parallel_job(url_batch))
Exemple #2
0
 def test_url_queue_bool_true(self, url_queue: URLQueue):
     url_queue.add(self.test_urls)
     assert url_queue
Exemple #3
0
 def test_process_url_queue(self, url_queue: URLQueue):
     url_queue.add(self.test_urls)
     for url in url_queue:
         assert url in self.test_urls
     assert url_queue.is_empty()
Exemple #4
0
 def test_url_queue_is_empty_true(self, url_queue: URLQueue):
     assert url_queue.is_empty()
Exemple #5
0
 def test_url_queue_is_empty_false(self, url_queue: URLQueue):
     url_queue.add(self.test_urls)
     assert not url_queue.is_empty()
Exemple #6
0
 def test_url_queue_clear(self, url_queue: URLQueue):
     url_queue.add(self.test_urls)
     url_queue.clear()
     assert url_queue.urls == list()
Exemple #7
0
 def test_url_queue_len(self, url_queue: URLQueue):
     assert len(url_queue) == 0
     url_queue.add(self.test_urls)
     assert len(url_queue) == len(self.test_urls)
Exemple #8
0
 def test_url_queue_add_fail(self, url_queue: URLQueue):
     with pytest.raises(ValueError):
         url_queue.add(42)
Exemple #9
0
 def test_add_url_to_queue(self, url_queue: URLQueue):
     new_url = 'https://code-specialist.com'
     previous_length = len(url_queue.urls)
     url_queue.add(new_url)
     assert len(url_queue.urls) == previous_length + 1
     assert 'https://code-specialist.com' in url_queue.urls
Exemple #10
0
 def test_fill_url_queue(self, url_queue: URLQueue):
     url_queue.add(self.test_urls)
     assert url_queue.urls == self.test_urls
Exemple #11
0
 def test_create_filled_url_queue(self):
     url_queue = URLQueue(self.test_urls)
     assert url_queue.urls == self.test_urls
Exemple #12
0
 def url_queue() -> URLQueue:
     return URLQueue()