async def download_file(client: httpx.Client, out_dir: Path, url_parts: Tuple[str, str]): """download a single file from NREL, identified by url_parts Parameters ---------- client : httpx.Client async web client out_dir : Path destination directory url_parts : Tuple[str, str] tuple of (date_url, filename.mat), like ("2019/04/17/", "04_17_2019_03_50_00_000.mat") Use _dense_samples() to produce a series of them. """ url = "".join([BASE_URL, *url_parts]) filepath = out_dir / url_parts[1] try: async with client.stream("GET", url) as resp: try: resp.raise_for_status() async with aiofiles.open(filepath, "wb") as f: async for data in resp.aiter_bytes(): if data: await f.write(data) print( f"Downloaded {url_parts[1]} at {pd.Timestamp('now').strftime('%H:%M:%S')}" ) except httpx.HTTPError: logging.info(f"HTTPError for {url_parts[1]}") except ReadTimeout: logging.info(f"ReadTimeout for {url_parts[1]}") except (httpx.ConnectTimeout, httpx._exceptions.ConnectTimeout): logging.warning(f"Timeout for {url_parts[1]} Needs re-download.")
def standard_download(session: httpx.Client, url: str, content_dir: pathlib.Path, outfile_name: str, extension: str, content_size: int, headers: dict = {}, ranges=True, **opts): file = "{}.{}".format(outfile_name, extension) logger = logging.getLogger("downloader/standard[{}]".format(file)) out_path = content_dir / pathlib.Path(sanitize_filename(file)) if not ranges: logger.critical( "Stream does not support ranged downloading; failed downloads cannot be continued." ) with open(out_path, "ab") as outstream: downloaded = outstream.tell() if ranges else 0 progress_bar = tqdm( desc="GET / {}".format(file), total=content_size, disable=opts.get("log_level", 20) > 20, initial=downloaded, unit="B", unit_scale=True, unit_divisor=1024, ) while content_size > downloaded: temporary_headers = headers.copy() if ranges: temporary_headers.update( {"Ranges": "bytes={}-".format(downloaded)}) try: with session.stream("GET", url, headers=headers) as http_stream: http_stream.raise_for_status() for chunks in http_stream.iter_bytes(): size = len(chunks) outstream.write(chunks) progress_bar.update(size) downloaded += size except httpx.RequestError as e: if not ranges: downloaded = 0 outstream.seek(0) progress_bar.clear() else: outstream.flush() logger.error( "Downloading error due to {!r}, retrying.".format(e)) time.sleep(opts.get("retry_timeout") or 5.0) progress_bar.close()