def test_download_unique(httpserver, tmpdir): httpserver.serve_content('SIMPLE = T') fname = "testing123" filename = str(tmpdir.join(fname)) filenames = [filename, filename + '.fits', filename + '.fits.gz'] dl = Downloader(overwrite='unique') # Write files to both the target filenames. for fn in filenames: with open(fn, "w") as fh: fh.write("Hello world") dl.enqueue_file(httpserver.url, filename=fn, chunksize=200) f = dl.download() assert isinstance(f, Results) assert len(f) == len(filenames) for fn in f: assert fn not in filenames assert f"{fname}.1" in fn
class ParallelDownloader: def __init__(self): self.downloader = Downloader( progress=False, file_progress=False, notebook=False, overwrite=True ) self.total_files_added = 0 async def append_file(self, link: str, download_path: Path): await makedirs(download_path.parent, exist_ok=True) self.downloader.enqueue_file( url=link, path=download_path.parent, filename=download_path.name ) self.total_files_added += 1 async def download_files(self, app: Application): """starts the download and waits for all files to finish""" # run this async, parfive will support aiofiles in the future as stated above wrapped_function = aiofiles_os.wrap(self.downloader.download) exporter_settings = get_settings(app) results = await wrapped_function( timeouts={ "total": exporter_settings.downloader_max_timeout_seconds, "sock_read": 90, # default as in parfive code } ) log.debug("Download results %s", results) if len(results) != self.total_files_added: raise ExporterException( "Not all files were downloaded. Please check the logs above." )
class ParallelDownloader: def __init__(self): self.downloader = Downloader(progress=False, file_progress=False, notebook=False, overwrite=True) self.total_files_added = 0 async def append_file(self, link: str, download_path: Path) -> None: await makedirs(download_path.parent, exist_ok=True) self.downloader.enqueue_file(url=link, path=download_path.parent, filename=download_path.name) self.total_files_added += 1 async def download_files(self, app: Application) -> None: """starts the download and waits for all files to finish""" exporter_settings = get_plugin_settings(app) assert ( # nosec exporter_settings is not None ), "this call was not expected with a disabled plugin" # nosec results = await self.downloader.run_download( timeouts={ "total": exporter_settings.EXPORTER_DOWNLOADER_MAX_TIMEOUT_SECONDS, "sock_read": 90, # default as in parfive code }) log.debug("Download %s using %s", f"{results=}", f"{self.downloader=}") if len(results) != self.total_files_added or len(results.errors) > 0: message = f"Not all files were downloaded: {results.errors=}" log.error(message) raise ExporterException(message)
def test_raises_other_exception(httpserver, tmpdir): tmpdir = str(tmpdir) httpserver.serve_content('SIMPLE = T') dl = Downloader() dl.enqueue_file(httpserver.url, path=tmpdir) res = dl.download() assert isinstance(res.errors[0].exception, ValueError)
def test_setup(): dl = Downloader() assert isinstance(dl, Downloader) assert len(dl.http_queue) == 0 assert len(dl.ftp_queue) == 0 assert dl._generate_tokens().qsize() == 5
def test_ftp_pasv_command(tmpdir): tmpdir = str(tmpdir) dl = Downloader() dl.enqueue_file( "ftp://ftp.ngdc.noaa.gov/STP/swpc_products/daily_reports/solar_region_summaries/2002/04/20020414SRS.txt", path=tmpdir, passive_commands=["pasv"]) assert dl.queued_downloads == 1 f = dl.download() assert len(f) == 1 assert len(f.errors) == 0
def test_notaurl(tmpdir): tmpdir = str(tmpdir) dl = Downloader(progress=False) dl.enqueue_file("http://notaurl.wibble/file", path=tmpdir) f = dl.download() assert len(f.errors) == 1 assert isinstance(f.errors[0].exception, aiohttp.ClientConnectionError)
def test_download(event_loop, httpserver, tmpdir): tmpdir = str(tmpdir) httpserver.serve_content('SIMPLE = T') dl = Downloader(loop=event_loop) dl.enqueue_file(httpserver.url, path=Path(tmpdir)) f = dl.download() assert len(f) == 1 # strip the http:// assert httpserver.url[7:] in f[0]
def test_download_partial(event_loop, httpserver, tmpdir): tmpdir = str(tmpdir) httpserver.serve_content('SIMPLE = T') dl = Downloader(loop=event_loop) dl.enqueue_file(httpserver.url, filename=lambda resp, url: Path(tmpdir)/"filename") f = dl.download() assert len(f) == 1 # strip the http:// assert "filename" in f[0]
async def test_async_download(httpserver, tmpdir): httpserver.serve_content( 'SIMPLE = T', headers={'Content-Disposition': "attachment; filename=testfile.fits"}) dl = Downloader() dl.enqueue_file(httpserver.url, path=Path(tmpdir), max_splits=None) assert dl.queued_downloads == 1 f = await dl.run_download() validate_test_file(f)
def test_download_ranged_http(httpserver, tmpdir): tmpdir = str(tmpdir) httpserver.serve_content( 'SIMPLE = T', headers={'Content-Disposition': "attachment; filename=testfile.fits"}) dl = Downloader() dl.enqueue_file(httpserver.url, path=Path(tmpdir)) assert dl.queued_downloads == 1 f = dl.download() validate_test_file(f)
def test_retrieve_some_content(testserver, tmpdir): """ Test that the downloader handles errors properly. """ tmpdir = str(tmpdir) dl = Downloader() nn = 5 for i in range(nn): dl.enqueue_file(testserver.url, path=tmpdir) f = dl.download() assert len(f) == nn - 1 assert len(f.errors) == 1
def test_download_filename(event_loop, httpserver, tmpdir): httpserver.serve_content('SIMPLE = T') fname = "testing123" filename = str(tmpdir.join(fname)) dl = Downloader(loop=event_loop) dl.enqueue_file(httpserver.url, filename=filename, chunksize=200) f = dl.download() assert isinstance(f, Results) assert len(f) == 1 assert f[0] == filename
def test_download_ranged_http(event_loop, httpserver, tmpdir): tmpdir = str(tmpdir) httpserver.serve_content('SIMPLE = T', headers={'Content-Disposition': "attachment; filename=testfile.fits"}) dl = Downloader(loop=event_loop) dl.enqueue_file(httpserver.url, path=Path(tmpdir)) assert dl.queued_downloads == 1 f = dl.download() assert len(f) == 1 assert Path(f[0]).name == "testfile.fits" assert sha256sum(f[0]) == "a1c58cd340e3bd33f94524076f1fa5cf9a7f13c59d5272a9d4bc0b5bc436d9b3"
def test_ftp(tmpdir): tmpdir = str(tmpdir) dl = Downloader() dl.enqueue_file("ftp://ftp.swpc.noaa.gov/pub/warehouse/2011/2011_SRS.tar.gz", path=tmpdir) dl.enqueue_file("ftp://ftp.swpc.noaa.gov/pub/warehouse/2011/2013_SRS.tar.gz", path=tmpdir) dl.enqueue_file("ftp://ftp.swpc.noaa.gov/pub/_SRS.tar.gz", path=tmpdir) dl.enqueue_file("ftp://notaserver/notafile.fileL", path=tmpdir) f = dl.download() assert len(f) == 1 assert len(f.errors) == 3
def test_ssl_context(): # Assert that the unpickalable SSL context object doesn't anger the # dataclass gods gen = lambda config: aiohttp.ClientSession(context=ssl. create_default_context()) c = SessionConfig(aiohttp_session_generator=gen) d = Downloader(config=c)
def test_regression_download_ranged_http(httpserver, tmpdir): tmpdir = str(tmpdir) httpserver.serve_content('S', headers={ 'Content-Disposition': "attachment; filename=testfile.fits", 'Accept-Ranges': "bytes" }) dl = Downloader() dl.enqueue_file(httpserver.url, path=Path(tmpdir)) assert dl.queued_downloads == 1 f = dl.download() assert len(f.errors) == 0, f.errors
def test_simple_download(httpserver, tmpdir): tmpdir = str(tmpdir) httpserver.serve_content( 'SIMPLE = T', headers={'Content-Disposition': "attachment; filename=testfile.fits"}) f = Downloader.simple_download([httpserver.url], path=Path(tmpdir)) validate_test_file(f)
def test_download_filename(httpserver, tmpdir): httpserver.serve_content('SIMPLE = T') fname = "testing123" filename = str(tmpdir.join(fname)) with open(filename, "w") as fh: fh.write("SIMPLE = T") dl = Downloader() dl.enqueue_file(httpserver.url, filename=filename, chunksize=200) f = dl.download() assert isinstance(f, Results) assert len(f) == 1 assert f[0] == filename
def test_setup(event_loop): dl = Downloader(loop=event_loop) assert isinstance(dl, Downloader) assert dl.http_queue.qsize() == 0 assert dl.http_tokens.qsize() == 5 assert dl.ftp_queue.qsize() == 0 assert dl.ftp_tokens.qsize() == 5
def test_download_overwrite(event_loop, httpserver, tmpdir): httpserver.serve_content('SIMPLE = T') fname = "testing123" filename = str(tmpdir.join(fname)) with open(filename, "w") as fh: fh.write("Hello world") dl = Downloader(loop=event_loop, overwrite=True) dl.enqueue_file(httpserver.url, filename=filename, chunksize=200) f = dl.download() assert isinstance(f, Results) assert len(f) == 1 assert f[0] == filename with open(filename) as fh: assert fh.read() == "SIMPLE = T"
def test_download_no_overwrite(httpserver, tmpdir): httpserver.serve_content('SIMPLE = T') fname = "testing123" filename = str(tmpdir.join(fname)) with open(filename, "w") as fh: fh.write("Hello world") dl = Downloader() dl.enqueue_file(httpserver.url, filename=filename, chunksize=200) f = dl.download() assert isinstance(f, Results) assert len(f) == 1 assert f[0] == filename with open(filename) as fh: # If the contents is the same as when we wrote it, it hasn't been # overwritten assert fh.read() == "Hello world"
def test_raises_other_exception(httpserver, tmpdir): tmpdir = str(tmpdir) httpserver.serve_content('SIMPLE = T') dl = Downloader() dl.enqueue_file(httpserver.url, path=tmpdir) with pytest.raises(ValueError): dl.download()
def test_changed_max_conn(httpserver, tmpdir): # Check that changing max_conn works after creating Downloader tmpdir = str(tmpdir) httpserver.serve_content( 'SIMPLE = T', headers={'Content-Disposition': "attachment; filename=testfile.fits"}) dl = Downloader(max_conn=4) dl.enqueue_file(httpserver.url, path=Path(tmpdir), max_splits=None) dl.max_conn = 3 f = dl.download() validate_test_file(f)
def test_no_progress(httpserver, tmpdir, capsys): tmpdir = str(tmpdir) httpserver.serve_content('SIMPLE = T') dl = Downloader(progress=False) dl.enqueue_file(httpserver.url, path=tmpdir) dl.download() # Check that there was not stdout captured = capsys.readouterr().out assert not captured
def test_default_user_agent(event_loop, httpserver, tmpdir): tmpdir = str(tmpdir) httpserver.serve_content('SIMPLE = T', headers={'Content-Disposition': "attachment; filename=testfile.fits"}) dl = Downloader(loop=event_loop) dl.enqueue_file(httpserver.url, path=Path(tmpdir), max_splits=None) assert dl.queued_downloads == 1 dl.download() assert 'User-Agent' in httpserver.requests[0].headers assert httpserver.requests[0].headers['User-Agent'] == f"parfive/{parfive.__version__} aiohttp/{aiohttp.__version__} python/{sys.version[:5]}"
def test_custom_user_agent(event_loop, httpserver, tmpdir): tmpdir = str(tmpdir) httpserver.serve_content('SIMPLE = T', headers={'Content-Disposition': "attachment; filename=testfile.fits"}) dl = Downloader(loop=event_loop, headers={'User-Agent': 'test value 299792458'}) dl.enqueue_file(httpserver.url, path=Path(tmpdir), max_splits=None) assert dl.queued_downloads == 1 dl.download() assert 'User-Agent' in httpserver.requests[0].headers assert httpserver.requests[0].headers['User-Agent'] == "test value 299792458"
def test_retry(tmpdir, testserver): tmpdir = str(tmpdir) dl = Downloader() nn = 5 for i in range(nn): dl.enqueue_file(testserver.url, path=tmpdir) f = dl.download() assert len(f) == nn - 1 assert len(f.errors) == 1 f2 = dl.retry(f) assert len(f2) == nn assert len(f2.errors) == 0
def test_proxy_passed_as_kwargs_to_get(event_loop, tmpdir, url, proxy): with mock.patch( "aiohttp.client.ClientSession._request", new_callable=mock.MagicMock ) as patched: dl = Downloader(loop=event_loop) dl.enqueue_file(url, path=Path(tmpdir), max_splits=None) assert dl.queued_downloads == 1 dl.download() assert patched.called, "`ClientSession._request` not called" assert list(patched.call_args) == [('GET', url), {'allow_redirects': True, 'timeout': ClientTimeout(total=300, connect=None, sock_read=90, sock_connect=None), 'proxy': proxy }]
def test_ftp_http(tmpdir, httpserver): tmpdir = str(tmpdir) httpserver.serve_content('SIMPLE = T') dl = Downloader() dl.enqueue_file( "ftp://ftp.swpc.noaa.gov/pub/warehouse/2011/2011_SRS.tar.gz", path=tmpdir) dl.enqueue_file( "ftp://ftp.swpc.noaa.gov/pub/warehouse/2011/2013_SRS.tar.gz", path=tmpdir) dl.enqueue_file("ftp://ftp.swpc.noaa.gov/pub/_SRS.tar.gz", path=tmpdir) dl.enqueue_file("ftp://notaserver/notafile.fileL", path=tmpdir) dl.enqueue_file(httpserver.url, path=tmpdir) dl.enqueue_file("http://noaurl.notadomain/noafile", path=tmpdir) assert dl.queued_downloads == 6 f = dl.download() assert len(f) == 2 assert len(f.errors) == 4