def fetch(self, urls: List[str]) -> Generator[List[FetchResult], None, None]: """Fetches urls parallel in batches and returns a generator that yields every fetched URL batch as a list of FetchResult objects.""" loop = self._get_event_loop() queue = URLQueue(urls=urls) while not queue.is_empty(): url_batch = [ next(queue) for _ in range(self.fetch_batch_size) if not queue.is_empty() ] yield loop.run_until_complete(self._fetch_parallel_job(url_batch))
def test_url_queue_bool_true(self, url_queue: URLQueue): url_queue.add(self.test_urls) assert url_queue
def test_process_url_queue(self, url_queue: URLQueue): url_queue.add(self.test_urls) for url in url_queue: assert url in self.test_urls assert url_queue.is_empty()
def test_url_queue_is_empty_true(self, url_queue: URLQueue): assert url_queue.is_empty()
def test_url_queue_is_empty_false(self, url_queue: URLQueue): url_queue.add(self.test_urls) assert not url_queue.is_empty()
def test_url_queue_clear(self, url_queue: URLQueue): url_queue.add(self.test_urls) url_queue.clear() assert url_queue.urls == list()
def test_url_queue_len(self, url_queue: URLQueue): assert len(url_queue) == 0 url_queue.add(self.test_urls) assert len(url_queue) == len(self.test_urls)
def test_url_queue_add_fail(self, url_queue: URLQueue): with pytest.raises(ValueError): url_queue.add(42)
def test_add_url_to_queue(self, url_queue: URLQueue): new_url = 'https://code-specialist.com' previous_length = len(url_queue.urls) url_queue.add(new_url) assert len(url_queue.urls) == previous_length + 1 assert 'https://code-specialist.com' in url_queue.urls
def test_fill_url_queue(self, url_queue: URLQueue): url_queue.add(self.test_urls) assert url_queue.urls == self.test_urls
def test_create_filled_url_queue(self): url_queue = URLQueue(self.test_urls) assert url_queue.urls == self.test_urls
def url_queue() -> URLQueue: return URLQueue()