예제 #1
0
def test_download_unique(httpserver, tmpdir):
    httpserver.serve_content('SIMPLE  = T')

    fname = "testing123"
    filename = str(tmpdir.join(fname))

    filenames = [filename, filename + '.fits', filename + '.fits.gz']

    dl = Downloader(overwrite='unique')

    # Write files to both the target filenames.
    for fn in filenames:
        with open(fn, "w") as fh:
            fh.write("Hello world")

            dl.enqueue_file(httpserver.url, filename=fn, chunksize=200)

    f = dl.download()

    assert isinstance(f, Results)
    assert len(f) == len(filenames)

    for fn in f:
        assert fn not in filenames
        assert f"{fname}.1" in fn
예제 #2
0
class ParallelDownloader:
    def __init__(self):
        self.downloader = Downloader(
            progress=False, file_progress=False, notebook=False, overwrite=True
        )
        self.total_files_added = 0

    async def append_file(self, link: str, download_path: Path):
        await makedirs(download_path.parent, exist_ok=True)
        self.downloader.enqueue_file(
            url=link, path=download_path.parent, filename=download_path.name
        )
        self.total_files_added += 1

    async def download_files(self, app: Application):
        """starts the download and waits for all files to finish"""

        # run this async, parfive will support aiofiles in the future as stated above
        wrapped_function = aiofiles_os.wrap(self.downloader.download)
        exporter_settings = get_settings(app)
        results = await wrapped_function(
            timeouts={
                "total": exporter_settings.downloader_max_timeout_seconds,
                "sock_read": 90,  # default as in parfive code
            }
        )
        log.debug("Download results %s", results)

        if len(results) != self.total_files_added:
            raise ExporterException(
                "Not all files were downloaded. Please check the logs above."
            )
예제 #3
0
class ParallelDownloader:
    def __init__(self):
        self.downloader = Downloader(progress=False,
                                     file_progress=False,
                                     notebook=False,
                                     overwrite=True)
        self.total_files_added = 0

    async def append_file(self, link: str, download_path: Path) -> None:
        await makedirs(download_path.parent, exist_ok=True)
        self.downloader.enqueue_file(url=link,
                                     path=download_path.parent,
                                     filename=download_path.name)
        self.total_files_added += 1

    async def download_files(self, app: Application) -> None:
        """starts the download and waits for all files to finish"""
        exporter_settings = get_plugin_settings(app)
        assert (  # nosec
            exporter_settings is not None
        ), "this call was not expected with a disabled plugin"  # nosec

        results = await self.downloader.run_download(
            timeouts={
                "total":
                exporter_settings.EXPORTER_DOWNLOADER_MAX_TIMEOUT_SECONDS,
                "sock_read": 90,  # default as in parfive code
            })

        log.debug("Download %s using %s", f"{results=}", f"{self.downloader=}")
        if len(results) != self.total_files_added or len(results.errors) > 0:
            message = f"Not all files were downloaded: {results.errors=}"
            log.error(message)
            raise ExporterException(message)
예제 #4
0
def test_raises_other_exception(httpserver, tmpdir):
    tmpdir = str(tmpdir)
    httpserver.serve_content('SIMPLE  = T')
    dl = Downloader()

    dl.enqueue_file(httpserver.url, path=tmpdir)
    res = dl.download()
    assert isinstance(res.errors[0].exception, ValueError)
예제 #5
0
def test_setup():
    dl = Downloader()

    assert isinstance(dl, Downloader)

    assert len(dl.http_queue) == 0
    assert len(dl.ftp_queue) == 0
    assert dl._generate_tokens().qsize() == 5
예제 #6
0
def test_ftp_pasv_command(tmpdir):
    tmpdir = str(tmpdir)
    dl = Downloader()
    dl.enqueue_file(
        "ftp://ftp.ngdc.noaa.gov/STP/swpc_products/daily_reports/solar_region_summaries/2002/04/20020414SRS.txt",
        path=tmpdir,
        passive_commands=["pasv"])
    assert dl.queued_downloads == 1
    f = dl.download()
    assert len(f) == 1
    assert len(f.errors) == 0
예제 #7
0
def test_notaurl(tmpdir):
    tmpdir = str(tmpdir)

    dl = Downloader(progress=False)

    dl.enqueue_file("http://notaurl.wibble/file", path=tmpdir)

    f = dl.download()

    assert len(f.errors) == 1
    assert isinstance(f.errors[0].exception, aiohttp.ClientConnectionError)
예제 #8
0
def test_download(event_loop, httpserver, tmpdir):
    tmpdir = str(tmpdir)
    httpserver.serve_content('SIMPLE  = T')
    dl = Downloader(loop=event_loop)

    dl.enqueue_file(httpserver.url, path=Path(tmpdir))
    f = dl.download()
    assert len(f) == 1

    # strip the http://
    assert httpserver.url[7:] in f[0]
예제 #9
0
def test_download_partial(event_loop, httpserver, tmpdir):
    tmpdir = str(tmpdir)
    httpserver.serve_content('SIMPLE  = T')
    dl = Downloader(loop=event_loop)

    dl.enqueue_file(httpserver.url, filename=lambda resp, url: Path(tmpdir)/"filename")
    f = dl.download()
    assert len(f) == 1

    # strip the http://
    assert "filename" in f[0]
예제 #10
0
async def test_async_download(httpserver, tmpdir):
    httpserver.serve_content(
        'SIMPLE  = T',
        headers={'Content-Disposition': "attachment; filename=testfile.fits"})
    dl = Downloader()

    dl.enqueue_file(httpserver.url, path=Path(tmpdir), max_splits=None)

    assert dl.queued_downloads == 1

    f = await dl.run_download()
    validate_test_file(f)
예제 #11
0
def test_download_ranged_http(httpserver, tmpdir):
    tmpdir = str(tmpdir)
    httpserver.serve_content(
        'SIMPLE  = T',
        headers={'Content-Disposition': "attachment; filename=testfile.fits"})
    dl = Downloader()

    dl.enqueue_file(httpserver.url, path=Path(tmpdir))

    assert dl.queued_downloads == 1

    f = dl.download()
    validate_test_file(f)
예제 #12
0
def test_retrieve_some_content(testserver, tmpdir):
    """
    Test that the downloader handles errors properly.
    """
    tmpdir = str(tmpdir)
    dl = Downloader()

    nn = 5
    for i in range(nn):
        dl.enqueue_file(testserver.url, path=tmpdir)

    f = dl.download()

    assert len(f) == nn - 1
    assert len(f.errors) == 1
예제 #13
0
def test_download_filename(event_loop, httpserver, tmpdir):
    httpserver.serve_content('SIMPLE  = T')

    fname = "testing123"
    filename = str(tmpdir.join(fname))

    dl = Downloader(loop=event_loop)

    dl.enqueue_file(httpserver.url, filename=filename, chunksize=200)
    f = dl.download()

    assert isinstance(f, Results)
    assert len(f) == 1

    assert f[0] == filename
예제 #14
0
def test_download_ranged_http(event_loop, httpserver, tmpdir):
    tmpdir = str(tmpdir)
    httpserver.serve_content('SIMPLE  = T',
                             headers={'Content-Disposition': "attachment; filename=testfile.fits"})
    dl = Downloader(loop=event_loop)

    dl.enqueue_file(httpserver.url, path=Path(tmpdir))

    assert dl.queued_downloads == 1

    f = dl.download()

    assert len(f) == 1
    assert Path(f[0]).name == "testfile.fits"
    assert sha256sum(f[0]) == "a1c58cd340e3bd33f94524076f1fa5cf9a7f13c59d5272a9d4bc0b5bc436d9b3"
예제 #15
0
def test_ftp(tmpdir):
    tmpdir = str(tmpdir)
    dl = Downloader()

    dl.enqueue_file("ftp://ftp.swpc.noaa.gov/pub/warehouse/2011/2011_SRS.tar.gz", path=tmpdir)
    dl.enqueue_file("ftp://ftp.swpc.noaa.gov/pub/warehouse/2011/2013_SRS.tar.gz", path=tmpdir)
    dl.enqueue_file("ftp://ftp.swpc.noaa.gov/pub/_SRS.tar.gz", path=tmpdir)
    dl.enqueue_file("ftp://notaserver/notafile.fileL", path=tmpdir)

    f = dl.download()
    assert len(f) == 1
    assert len(f.errors) == 3
예제 #16
0
def test_ssl_context():
    # Assert that the unpickalable SSL context object doesn't anger the
    # dataclass gods
    gen = lambda config: aiohttp.ClientSession(context=ssl.
                                               create_default_context())
    c = SessionConfig(aiohttp_session_generator=gen)
    d = Downloader(config=c)
예제 #17
0
def test_regression_download_ranged_http(httpserver, tmpdir):
    tmpdir = str(tmpdir)
    httpserver.serve_content('S',
                             headers={
                                 'Content-Disposition':
                                 "attachment; filename=testfile.fits",
                                 'Accept-Ranges': "bytes"
                             })
    dl = Downloader()

    dl.enqueue_file(httpserver.url, path=Path(tmpdir))

    assert dl.queued_downloads == 1

    f = dl.download()
    assert len(f.errors) == 0, f.errors
예제 #18
0
def test_simple_download(httpserver, tmpdir):
    tmpdir = str(tmpdir)
    httpserver.serve_content(
        'SIMPLE  = T',
        headers={'Content-Disposition': "attachment; filename=testfile.fits"})

    f = Downloader.simple_download([httpserver.url], path=Path(tmpdir))
    validate_test_file(f)
예제 #19
0
def test_download_filename(httpserver, tmpdir):
    httpserver.serve_content('SIMPLE  = T')

    fname = "testing123"
    filename = str(tmpdir.join(fname))
    with open(filename, "w") as fh:
        fh.write("SIMPLE = T")

    dl = Downloader()

    dl.enqueue_file(httpserver.url, filename=filename, chunksize=200)
    f = dl.download()

    assert isinstance(f, Results)
    assert len(f) == 1

    assert f[0] == filename
예제 #20
0
def test_setup(event_loop):
    dl = Downloader(loop=event_loop)

    assert isinstance(dl, Downloader)

    assert dl.http_queue.qsize() == 0
    assert dl.http_tokens.qsize() == 5
    assert dl.ftp_queue.qsize() == 0
    assert dl.ftp_tokens.qsize() == 5
예제 #21
0
def test_download_overwrite(event_loop, httpserver, tmpdir):
    httpserver.serve_content('SIMPLE  = T')

    fname = "testing123"
    filename = str(tmpdir.join(fname))
    with open(filename, "w") as fh:
        fh.write("Hello world")

    dl = Downloader(loop=event_loop, overwrite=True)

    dl.enqueue_file(httpserver.url, filename=filename, chunksize=200)
    f = dl.download()

    assert isinstance(f, Results)
    assert len(f) == 1

    assert f[0] == filename

    with open(filename) as fh:
        assert fh.read() == "SIMPLE  = T"
예제 #22
0
def test_download_no_overwrite(httpserver, tmpdir):
    httpserver.serve_content('SIMPLE  = T')

    fname = "testing123"
    filename = str(tmpdir.join(fname))
    with open(filename, "w") as fh:
        fh.write("Hello world")

    dl = Downloader()

    dl.enqueue_file(httpserver.url, filename=filename, chunksize=200)
    f = dl.download()

    assert isinstance(f, Results)
    assert len(f) == 1

    assert f[0] == filename

    with open(filename) as fh:
        # If the contents is the same as when we wrote it, it hasn't been
        # overwritten
        assert fh.read() == "Hello world"
예제 #23
0
def test_raises_other_exception(httpserver, tmpdir):
    tmpdir = str(tmpdir)
    httpserver.serve_content('SIMPLE  = T')
    dl = Downloader()

    dl.enqueue_file(httpserver.url, path=tmpdir)
    with pytest.raises(ValueError):
        dl.download()
예제 #24
0
def test_changed_max_conn(httpserver, tmpdir):
    # Check that changing max_conn works after creating Downloader
    tmpdir = str(tmpdir)
    httpserver.serve_content(
        'SIMPLE  = T',
        headers={'Content-Disposition': "attachment; filename=testfile.fits"})
    dl = Downloader(max_conn=4)
    dl.enqueue_file(httpserver.url, path=Path(tmpdir), max_splits=None)
    dl.max_conn = 3

    f = dl.download()
    validate_test_file(f)
예제 #25
0
def test_no_progress(httpserver, tmpdir, capsys):
    tmpdir = str(tmpdir)
    httpserver.serve_content('SIMPLE  = T')
    dl = Downloader(progress=False)

    dl.enqueue_file(httpserver.url, path=tmpdir)

    dl.download()

    # Check that there was not stdout
    captured = capsys.readouterr().out
    assert not captured
예제 #26
0
def test_default_user_agent(event_loop, httpserver, tmpdir):
    tmpdir = str(tmpdir)
    httpserver.serve_content('SIMPLE  = T',
                             headers={'Content-Disposition': "attachment; filename=testfile.fits"})

    dl = Downloader(loop=event_loop)
    dl.enqueue_file(httpserver.url, path=Path(tmpdir), max_splits=None)

    assert dl.queued_downloads == 1

    dl.download()

    assert 'User-Agent' in httpserver.requests[0].headers
    assert httpserver.requests[0].headers['User-Agent'] == f"parfive/{parfive.__version__} aiohttp/{aiohttp.__version__} python/{sys.version[:5]}"
예제 #27
0
def test_custom_user_agent(event_loop, httpserver, tmpdir):
    tmpdir = str(tmpdir)
    httpserver.serve_content('SIMPLE  = T',
                             headers={'Content-Disposition': "attachment; filename=testfile.fits"})

    dl = Downloader(loop=event_loop, headers={'User-Agent': 'test value 299792458'})
    dl.enqueue_file(httpserver.url, path=Path(tmpdir), max_splits=None)

    assert dl.queued_downloads == 1

    dl.download()

    assert 'User-Agent' in httpserver.requests[0].headers
    assert httpserver.requests[0].headers['User-Agent'] == "test value 299792458"
예제 #28
0
def test_retry(tmpdir, testserver):
    tmpdir = str(tmpdir)
    dl = Downloader()

    nn = 5
    for i in range(nn):
        dl.enqueue_file(testserver.url, path=tmpdir)

    f = dl.download()

    assert len(f) == nn - 1
    assert len(f.errors) == 1

    f2 = dl.retry(f)

    assert len(f2) == nn
    assert len(f2.errors) == 0
예제 #29
0
def test_proxy_passed_as_kwargs_to_get(event_loop, tmpdir, url, proxy):

    with mock.patch(
                    "aiohttp.client.ClientSession._request",
                    new_callable=mock.MagicMock
                   ) as patched:

        dl = Downloader(loop=event_loop)
        dl.enqueue_file(url, path=Path(tmpdir), max_splits=None)

        assert dl.queued_downloads == 1

        dl.download()

    assert patched.called, "`ClientSession._request` not called"
    assert list(patched.call_args) == [('GET', url),
                                       {'allow_redirects': True, 
                                        'timeout': ClientTimeout(total=300, connect=None, sock_read=90, sock_connect=None),
                                        'proxy': proxy
                                       }]
예제 #30
0
def test_ftp_http(tmpdir, httpserver):
    tmpdir = str(tmpdir)
    httpserver.serve_content('SIMPLE  = T')
    dl = Downloader()

    dl.enqueue_file(
        "ftp://ftp.swpc.noaa.gov/pub/warehouse/2011/2011_SRS.tar.gz",
        path=tmpdir)
    dl.enqueue_file(
        "ftp://ftp.swpc.noaa.gov/pub/warehouse/2011/2013_SRS.tar.gz",
        path=tmpdir)
    dl.enqueue_file("ftp://ftp.swpc.noaa.gov/pub/_SRS.tar.gz", path=tmpdir)
    dl.enqueue_file("ftp://notaserver/notafile.fileL", path=tmpdir)
    dl.enqueue_file(httpserver.url, path=tmpdir)
    dl.enqueue_file("http://noaurl.notadomain/noafile", path=tmpdir)

    assert dl.queued_downloads == 6

    f = dl.download()
    assert len(f) == 2
    assert len(f.errors) == 4