def download_file(url, filename): headers = {"User-Agent": "Audible/671 CFNetwork/1240.0.4 Darwin/20.6.0"} with httpx.stream("GET", url, headers=headers) as r: with open(filename, 'wb') as f: for chunck in r.iter_bytes(): f.write(chunck) return filename
def downloadFileSync(url: str, target: Path, apikey: str) -> None: try: with target.open('wb') as file: with stream('GET', url, headers={ 'apikey'.encode('ascii'): apikey.strip().encode('ascii', 'backslashreplace') }, timeout=250.0) as download: if download.status_code == 429: raise RequestLimitReachedError() if download.status_code == 404: raise NotFoundError(f'No file with URL {url} found') if download.status_code == 403: raise NoPremiumMembershipException() if download.status_code == 401: raise UnauthorizedError() if download.status_code != 200: raise ResponseError( f'Unexpected response: Status {download.status_code}') for data in download.iter_bytes(): file.write(data) except HTTPStatusError as e: raise RequestError(request=e.request, response=e.response, kind=str(e)) except HTTPError as e: raise RequestError(request=e.request, response=None, kind=str(e))
def _get_antismash_zip_data(self, accession_id, filename, local_path): for base_url in [ ANTISMASH_DB_DOWNLOAD_URL, ANTISMASH_DBV2_DOWNLOAD_URL ]: zipfile_url = base_url.format(accession_id, filename) with open(local_path, 'wb') as f: total_bytes = 0 try: with httpx.stream('GET', zipfile_url) as r: if r.status_code == 404: logger.debug('antiSMASH download URL was a 404') continue logger.info('Downloading from antiSMASH: {}'.format( zipfile_url)) filesize = int(r.headers['content-length']) bar = Bar(filename, max=filesize, suffix='%(percent)d%%') for data in r.iter_bytes(): f.write(data) total_bytes += len(data) bar.next(len(data)) bar.finish() except Exception as e: logger.warning( 'antiSMASH zip download failed: {}'.format(e)) continue return True return False
def pull_github(source: dict, path: Optional[str] = None, secrets: dict = {}, **kwargs) -> Files: """ Pull a GitHub repo/subpath. If a user token is provided in `secrets` it will be used to authenticate as that user. """ assert source.get("repo"), "GitHub source must have a repo" subpath = source.get("subpath") or "" if subpath.endswith("/"): subpath = subpath[:-1] path = path or "." # Get the possibly token protected link for the repo archive # See https://developer.github.com/v3/repos/contents/#download-a-repository-archive client = github_client(secrets.get("token")) repo_resource = client.get_repo(source["repo"]) archive_link = repo_resource.get_archive_link("zipball") # Get the archive. To avoid it filling up memory, stream directly to file, # Increase timeout over the default of 5s. zip_file = tempfile.NamedTemporaryFile(suffix=".zip", delete=False) with httpx.stream("GET", archive_link, timeout=60) as response: for data in response.iter_bytes(): zip_file.write(data) zip_file.close() return pull_zip(zip_file.name, subpath=subpath, path=path)
def download(path, *args, **kwargs): with httpx.stream('GET', *args, **kwargs) as response: response.raise_for_status() with path.open(mode='wb', buffering=0) as f: for chunk in response.iter_bytes(16384): f.write(chunk)
def __init__(self, name: str, icon: str, id: str, author: str, version: str, config: str, description: str, tmpdir: str, parent=None): super(item, self).__init__(parent) self.tmpdir = tmpdir self.setText(name) iconame = icon.split("/")[-1] with httpx.stream("GET", icon) as response: with open(self.tmpdir + iconame, "wb+") as f: for chunk in response.iter_bytes(): f.write(chunk) self.setIcon(QIcon(self.tmpdir + iconame)) self.setToolTip(f"标题:{name}\nID:{id}\n作者:{author}\n版本:{version}") self.config = config self.description = description self.name = name self.version = version self.author = author
def links(timeout: int = 10, delay: float = 0.1): dynamically_generated_links_via_injection = { "https://www.sante.fr/cf/centres-vaccination-covid/departement-22-cotes-d'armor.html", "https://www.sante.fr/cf/centres-vaccination-covid/departement-20A-corse-du-sud.html", "https://www.sante.fr/cf/centres-vaccination-covid/departement-01-ain.html", "https://www.sante.fr/cf/centres-vaccination-covid.html", "https://www.sante.fr/cf/centres-depistage-covid/departement-01.html", "https://www.sante.fr/cf/centres-depistage-covid/departement-2A.html", "https://www.sante.fr/cf/centres-depistage-covid.html", } parser = LinkExtractor() content = (SRC_DIR / "index.html").read_text() parser.feed(content) links = parser.links.union(dynamically_generated_links_via_injection) for link in sorted(links): print(link) with httpx.stream( "GET", link, timeout=timeout, verify=False, # ignore SSL certificate validation errors ) as response: if response.status_code == HTTPStatus.TOO_MANY_REQUESTS: print("Warning: we’re being throttled, skipping link (429)") continue if response.status_code != HTTPStatus.OK: raise Exception(f"{link} is broken! ({response.status_code})") time.sleep(delay) # avoid being throttled
async def test_stream(server): async with httpx.stream("GET", server.url) as response: await response.aread() assert response.status_code == 200 assert response.reason_phrase == "OK" assert response.text == "Hello, world!" assert response.http_version == "HTTP/1.1"
def fetch_tornado_reports(): if os.path.isfile(PATH): print(f'Using cached {PATH}') return print(f'Fetchng {PATH}') with open(PATH, 'w') as file: with httpx.stream('GET', URL) as r: for data in r.iter_text(): file.writelines(data)
async def download_file(url: str, filename: Optional[str] = None) -> str: filename = filename or url.split("/")[-1] async with httpx.stream("GET", url) as resp: resp.raise_for_status() async with aiofiles.open(filename, "wb") as f: async for data in resp.aiter_bytes(): if data: await f.write(data) return filename
def _download_file(url, local_path, timeout): with httpx.stream( "GET", url, timeout=timeout, verify=False, # ignore SSL certificate validation errors ) as response: if response.status_code != HTTPStatus.OK: raise Exception(f"{url} is broken! ({response.status_code})") _save_binary_response(local_path, response)
def sync_detailed( *, client: AuthenticatedClient, name: str, snapshot_id: str, limit: Optional[int] = None, ) -> Response[Union[None, HTTPValidationError]]: kwargs = _get_kwargs(client=client, name=name, snapshot_id=snapshot_id, limit=limit) with httpx.stream("GET", **kwargs) as response: return build_stream_response(response=response)
def DownloadImage(self: Any, url: str) -> Image.Image: """Download the specified image file and return the image object.""" with httpx.stream("GET", url, timeout=30.0) as res: if res.status_code == 200: try: return Image.open(res, "RGBA") except ValueError: return Image.open(res).convert("RGBA") else: log.error(f"Failed to download image (HTTP {res.status_code})")
def __enter__(self): self.archive_dir = Path(tempfile.mkdtemp()) self.archive_file = self.archive_dir / self.url.split('/')[-1] logger.debug(f'Downloading fontawesome to {self.archive_file}') with httpx.stream('GET', self.url) as r: with self.archive_file.open('wb') as fd: for chunk in r.iter_bytes(): fd.write(chunk) self.root = Path(tempfile.mkdtemp()) logger.debug(f'Unpacking to {self.root}') shutil.unpack_archive(self.archive_file, self.root) return self.root / self.archive_file.stem
def sign_remote_file(url: str, **signer_kwargs): with tempfile.SpooledTemporaryFile(10 * 1024 * 1024) as f: with httpx.stream("GET", url) as r: pbar = tqdm(total=int(r.headers["content-length"]), unit="B", unit_scale=True, desc=str(r.url)) for data in r.iter_bytes(): f.write(data) pbar.update(len(data)) pbar.close() return sign_file(f, **signer_kwargs)
def sync_detailed( *, client: AuthenticatedClient, name: str, request: Optional[TextClassificationQuery] = None, limit: Optional[int] = None, ) -> Response[Union[None, HTTPValidationError]]: kwargs = _get_kwargs(client=client, name=name, limit=limit, json_body=request) with httpx.stream("POST", **kwargs) as response: return build_stream_response(response=response)
def download_and_unpack(url: str, out_path: Path) -> None: with NamedTemporaryFile() as download_file: with httpx.stream("GET", url) as response: total = int(response.headers["Content-Length"]) with wk.utils.get_rich_progress() as progress: download_task = progress.add_task("Download Image Data", total=total) for chunk in response.iter_bytes(): download_file.write(chunk) progress.update(download_task, completed=response.num_bytes_downloaded) with ZipFile(download_file, "r") as zip_file: zip_file.extractall(out_path)
def get_sbml(self, model_id: str) -> bytes: """ Attempt to download an SBML document from the repository. Parameters ---------- model_id : str The identifier of the desired metabolic model. This is typically repository specific. Returns ------- bytes A gzip-compressed, UTF-8 encoded SBML document. Raises ------ httpx.HTTPError In case there are any connection problems. """ data = BytesIO() response = httpx.get( url=self._url.join(f"files/{model_id}"), headers={"Accept": "application/json"}, ) response.raise_for_status() files = BioModelsFilesResponse.parse_obj(response.json()) for model in files.main: if model.name.endswith("xml"): break else: RuntimeError(f"Could not find an SBML document for '{model_id}'.") with self._progress, httpx.stream( method="GET", url=self._url.join(f"download/{model_id}"), params={"filename": model.name}, ) as response: response.raise_for_status() task_id = self._progress.add_task( description="download", total=model.size, model_id=model_id, ) for chunk in response.iter_bytes(): data.write(chunk) self._progress.update(task_id=task_id, advance=len(chunk)) data.seek(0) return gzip.compress(data.read())
def _download(self, url, dirname, filename): if not dirname.exists(): dirname.mkdir() file = dirname / filename with open(file.with_suffix('.url'), 'wt', encoding='utf8') as f: f.write(url) with open(file, 'wb') as f, httpx.stream('GET', url) as r: for chunk in r.iter_bytes(): f.write(chunk) return file
def html_to_bytes(self, html): kwargs = { "auth": ("api", settings.PDFSHIFT_API_KEY), "json": { "source": html, "sandbox": settings.PDFSHIFT_SANDBOX_MODE }, } with httpx.stream("POST", self.url, timeout=10.0, **kwargs) as response: response.raise_for_status() result = io.BytesIO() for chunk in response.iter_bytes(1024): result.write(chunk) return result.getvalue()
def _download_url(url: str, dest: Path) -> None: """Download archive from URL to file `dest`. Raise `WorkbenchModuleImportError` on HTTP error. """ try: with dest.open("wb") as w: with httpx.stream("GET", url) as r: for chunk in r.iter_bytes(): w.write(chunk) except httpx.HTTPError as err: raise WorkbenchModuleImportError( "HTTP error downloading %(url)s: %(message)s" % dict(url=url, message=str(err)) % {"url": url, "message": str(err)} )
def cli(url, output, etags, verbose): """ Fetch data using HTTP conditional get """ headers = {} try: existing_etags = json.load(open(etags)) except IOError: existing_etags = {} if url in existing_etags: headers["If-None-Match"] = existing_etags[url] if verbose: click.echo("Existing ETag: {}".format(existing_etags[url]), err=True) with httpx.stream("GET", url, headers=headers) as response: if verbose: click.echo("Response status code: {}".format(response.status_code), err=True) if not output: # Detect output from URL and content_type bits = urlparse(url) output = bits.path.split("/")[-1] if not output: # Use index.filetype content_type = response.headers["content-type"].split()[0] ext = CONTENT_TYPE_TO_EXT.get(content_type, content_type.split("/")[-1]) output = "index.{}".format(ext) if 304 == response.status_code: return elif 200 == response.status_code: etag = response.headers.get("etag") if etag: existing_etags[url] = etag bar = None if verbose and response.headers.get("content-length"): bar = click.progressbar( length=int(response.headers["content-length"])) with open(output, "wb") as fp: for b in response.iter_bytes(): fp.write(b) if bar: bar.update(len(b)) open(etags, "w").write(json.dumps(existing_etags, indent=4))
def get_test_file() -> Path: """Download a FITS file from e-Callisto to use during the tests. :returns: Path object of the downloaded FITS file. """ callisto_archives = ("http://soleil80.cs.technik.fhnw.ch/" "solarradio/data/2002-20yy_Callisto/") date_xpath = "2011/08/09/" fitsfile = "BLEN7M_20110809_080004_25.fit.gz" fitsurl = callisto_archives + date_xpath + fitsfile with open(fitsfile, "wb") as fin: with httpx.stream("GET", fitsurl) as r: for chunk in r.iter_raw(): fin.write(chunk) return Path(fitsfile)
def download(url, local_destination, recursing = 0): '''Download the 'url' to the file 'local_destination'.''' def addurl(text): return f'{text} for {url}' timeout = httpx.Timeout(15, connect = 15, read = 15, write = 15) with httpx.stream('get', url, verify = False, timeout = timeout, follow_redirects = True) as resp: code = resp.status_code if code == 202: # Code 202 = Accepted, "received but not yet acted upon." wait(2) # Sleep a short time and try again. raise_for_interrupts() recursing += 1 if recursing <= _MAX_RECURSIVE_CALLS: if __debug__: log('calling download(url) recursively for code 202') download(url, local_destination, recursing) else: raise ServiceFailure(addurl('Exceeded max retries for code 202')) elif 200 <= code < 400: with open(local_destination, 'wb') as f: for chunk in resp.iter_bytes(): raise_for_interrupts() f.write(chunk) resp.close() size = stat(local_destination).st_size if __debug__: log(f'wrote {size} bytes to file {local_destination}') elif code in [401, 402, 403, 407, 451, 511]: raise AuthenticationFailure(addurl('Access is forbidden')) elif code in [404, 410]: raise NoContent(addurl('No content found')) elif code in [405, 406, 409, 411, 412, 414, 417, 428, 431, 505, 510]: raise InternalError(addurl(f'Server returned code {code}')) elif code in [415, 416]: raise ServiceFailure(addurl('Server rejected the request')) elif code == 429: raise RateLimitExceeded('Server blocking further requests due to rate limits') elif code == 503: raise ServiceFailure('Server is unavailable -- try again later') elif code in [500, 501, 502, 506, 507, 508]: raise ServiceFailure(addurl(f'Internal server error (HTTP code {code})')) else: raise NetworkFailure(f'Unable to resolve {url}')
def download_and_extract_mibig_json(download_path, output_path, version='1.4'): archive_path = os.path.join(download_path, 'mibig_json_{}.tar.gz'.format(version)) logger.debug( 'Checking for existing MiBIG archive at {}'.format(archive_path)) cached = False if os.path.exists(archive_path): logger.info('Found cached file at {}'.format(archive_path)) try: _ = tarfile.open(archive_path) cached = True except: logger.info('Invalid MiBIG archive found, will download again') os.unlink(archive_path) if not cached: url = MIBIG_JSON_URL.format(version) with open(archive_path, 'wb') as f: total_bytes, last_total = 0, 0 with httpx.stream('GET', url) as r: filesize = int(r.headers['content-length']) bar = Bar(url, max=filesize, suffix='%(percent)d%%') for data in r.iter_bytes(): f.write(data) total_bytes += len(data) bar.next(len(data)) bar.finish() logger.debug('Extracting MiBIG JSON data') if os.path.exists(os.path.join(output_path, 'completed')): return True mibig_gz = tarfile.open(archive_path, 'r:gz') # extract and rename to "mibig_json" # TODO annoyingly the 2.0 version has been archived with a subdirectory, while # 1.4 just dumps all the files into the current directory, so if/when 2.0 support # is required this will need to handle both cases mibig_gz.extractall(path=os.path.join(output_path)) # os.rename(os.path.join(self.project_file_cache, 'mibig_json_{}'.format(version)), os.path.join(self.project_file_cache, 'mibig_json')) open(os.path.join(output_path, 'completed'), 'w').close() return True
def event_hook_tqdm(): """ 事件钩子结合tqdm 实现文件下载进度显式 """ with tempfile.NamedTemporaryFile() as download_file: url = "https://speed.hetzner.de/100MB.bin" with httpx.stream("GET", url) as response: total = int(response.headers["Content-Length"]) with tqdm(total=total, unit_scale=True, unit_divisor=1024, unit="B") as progress: num_bytes_downloaded = response.num_bytes_downloaded for chunk in response.iter_bytes(): download_file.write(chunk) progress.update(response.num_bytes_downloaded - num_bytes_downloaded) num_bytes_downloaded = response.num_bytes_downloaded
async def get_container_file( # pylint: disable=too-many-arguments cluster: str, environ: str, container: str, path: str, topology_name: str = Query(..., alias="topology"), role: Optional[str] = None, ): """Return a given raw file.""" topology = state.tracker.get_topology(cluster, role, environ, topology_name) stmgr = topology.info.physical_plan.stmgrs[f"stmgr-{container}"] url = f"http://{stmgr.host}:{stmgr.shell_port}/download/{path}" _, _, filename = path.rpartition("/") async with httpx.stream("GET", url) as response: return await StreamingResponse( content=response.iter_bytes(), headers={ "Content-Disposition": f"attachment; filename={filename}" }, )
def pull_http(source: dict, path: Optional[str] = None, secrets: dict = {}, **kwargs) -> Files: """ Pull a file from a HTTP source. """ url = source.get("url") assert url, "Source must have a URL" with httpx.stream("GET", url) as response: if response.status_code != 200: raise RuntimeError( f"Error when fetching {url}: {response.status_code}") size = int(response.headers.get("Content-Length", 0)) / GIGABYTE if size > MAX_SIZE: RuntimeError( f"Size of file is greater than {MAX_SIZE}GB maximum: {size}GB") if not path: path = str(os.path.basename(url)) if not file_ext(path): content_type = response.headers.get("Content-Type", "text/html").split(";")[0] ext = mimetypes.guess_extension(content_type, strict=False) path += ext or ".txt" ensure_parent(path) remove_if_dir(path) with open(path, "wb") as file: for data in response.iter_bytes(): file.write(data) return {path: file_info(path)} return {}
def test_httpcore_exception_mapping(server) -> None: """ HTTPCore exception mapping works as expected. """ # Make sure we don't just map to `NetworkError`. with pytest.raises(httpx.ConnectError): httpx.get("http://doesnotexist") # Make sure streaming methods also map exceptions. url = server.url.copy_with(path="/slow_stream_response") timeout = httpx.Timeout(None, read=0.1) with httpx.stream("GET", url, timeout=timeout) as stream: with pytest.raises(httpx.ReadTimeout): stream.read() # Make sure it also works with custom transports. class MockTransport(httpcore.SyncHTTPTransport): def request(self, *args: Any, **kwargs: Any) -> Any: raise httpcore.ProtocolError() client = httpx.Client(transport=MockTransport()) with pytest.raises(httpx.ProtocolError): client.get("http://testserver")
def get_sbml(self, model_id: str) -> bytes: """ Attempt to download an SBML document from the repository. Parameters ---------- model_id : str The identifier of the desired metabolic model. This is typically repository specific. Returns ------- bytes A gzip-compressed, UTF-8 encoded SBML document. Raises ------ httpx.HTTPError In case there are any connection problems. """ compressed = BytesIO() filename = f"{model_id}.xml.gz" with self._progress, httpx.stream( method="GET", url=self._url.join(filename)) as response: response.raise_for_status() task_id = self._progress.add_task( description="download", total=int(response.headers["Content-Length"]), model_id=model_id, ) for chunk in response.iter_bytes(): compressed.write(chunk) self._progress.update(task_id=task_id, advance=len(chunk)) compressed.seek(0) return compressed.read()