Beispiel #1
0
    def test_download_all(self):
        http_client = HttpClientStub()
        downloader = Downloader(http_client_factory=lambda: http_client)
        downloader.prepare(ArgsStub())
        consumer = ConsumerStub()
        cache_consumer = ConsumerStub()
        urls = list(http_client.get_data().keys())

        asyncio.run(downloader.download_all(urls, consumer))
        asyncio.run(downloader.download_all(urls, cache_consumer))

        # Verify that all pages were downloaded
        self.assertEqual(consumer.get_data(), http_client.get_data())
        self.assertEqual(http_client.get_calls(), urls)

        # Verify that no more pages were downloaded
        self.assertEqual(cache_consumer.get_data(), http_client.get_data())
        self.assertEqual(http_client.get_calls(), urls)
Beispiel #2
0
    def test_qps(self):
        http_client = HttpClientStub()
        downloader = Downloader(http_client_factory=lambda: http_client)
        args = ArgsStub()
        args.qps = 1
        downloader.prepare(args)
        consumer = ConsumerStub()
        urls = list(http_client.get_data().keys())

        start = datetime.now()
        asyncio.run(downloader.download_all(urls, consumer))
        end = datetime.now()

        # Verify that at qps = 1, we spent ~1 sec per download
        delta = timedelta(microseconds=500000)
        expected_duration = timedelta(seconds=len(urls) - 1)
        actual_duration = end - start
        self.assertLess(actual_duration, expected_duration + delta)
        self.assertGreater(actual_duration, expected_duration - delta)
Beispiel #3
0
 def update_all(self):
     total_count = 0
     for dataset in Downloader.download_all():
         total_count += Importer.from_lines(dataset)
     print("Inserted ", total_count)
def test_download_all(mock_get_sources, mock_download):
    for _ in Downloader.download_all():
        pass
    mock_download.assert_has_calls([call("source1"), call("source2")])