Exemple #1
0
async def download_file(client: httpx.Client, out_dir: Path,
                        url_parts: Tuple[str, str]):
    """download a single file from NREL, identified by url_parts

    Parameters
    ----------
    client : httpx.Client
        async web client
    out_dir : Path
        destination directory
    url_parts : Tuple[str, str]
        tuple of (date_url, filename.mat), like ("2019/04/17/", "04_17_2019_03_50_00_000.mat")
        Use _dense_samples() to produce a series of them.
    """
    url = "".join([BASE_URL, *url_parts])
    filepath = out_dir / url_parts[1]
    try:
        async with client.stream("GET", url) as resp:
            try:
                resp.raise_for_status()
                async with aiofiles.open(filepath, "wb") as f:
                    async for data in resp.aiter_bytes():
                        if data:
                            await f.write(data)
                print(
                    f"Downloaded {url_parts[1]} at {pd.Timestamp('now').strftime('%H:%M:%S')}"
                )
            except httpx.HTTPError:
                logging.info(f"HTTPError for {url_parts[1]}")
            except ReadTimeout:
                logging.info(f"ReadTimeout for {url_parts[1]}")
    except (httpx.ConnectTimeout, httpx._exceptions.ConnectTimeout):
        logging.warning(f"Timeout for {url_parts[1]} Needs re-download.")
Exemple #2
0
def standard_download(session: httpx.Client,
                      url: str,
                      content_dir: pathlib.Path,
                      outfile_name: str,
                      extension: str,
                      content_size: int,
                      headers: dict = {},
                      ranges=True,
                      **opts):
    file = "{}.{}".format(outfile_name, extension)

    logger = logging.getLogger("downloader/standard[{}]".format(file))
    out_path = content_dir / pathlib.Path(sanitize_filename(file))

    if not ranges:
        logger.critical(
            "Stream does not support ranged downloading; failed downloads cannot be continued."
        )

    with open(out_path, "ab") as outstream:
        downloaded = outstream.tell() if ranges else 0
        progress_bar = tqdm(
            desc="GET / {}".format(file),
            total=content_size,
            disable=opts.get("log_level", 20) > 20,
            initial=downloaded,
            unit="B",
            unit_scale=True,
            unit_divisor=1024,
        )
        while content_size > downloaded:
            temporary_headers = headers.copy()
            if ranges:
                temporary_headers.update(
                    {"Ranges": "bytes={}-".format(downloaded)})
            try:
                with session.stream("GET", url,
                                    headers=headers) as http_stream:
                    http_stream.raise_for_status()

                    for chunks in http_stream.iter_bytes():
                        size = len(chunks)
                        outstream.write(chunks)
                        progress_bar.update(size)
                        downloaded += size
            except httpx.RequestError as e:
                if not ranges:
                    downloaded = 0
                    outstream.seek(0)
                    progress_bar.clear()
                else:
                    outstream.flush()
                logger.error(
                    "Downloading error due to {!r}, retrying.".format(e))
                time.sleep(opts.get("retry_timeout") or 5.0)

    progress_bar.close()