def download_file(self, embeddings_folder, logger): logger.info('Downloading word embeddings file for "{}"...'.format( self.alias)) out_path = os.path.join(embeddings_folder, self.embeddings_filename) if os.path.exists(out_path): logger.info('Already downloaded -> skipping!') else: url = '{}/{}'.format(self.base_url, self.embeddings_filename) r = requests.get(url, stream=True) total_size = int( r.headers.get('content-length', self.approximate_filesize) ) # size of the embeddings file (bytes) chunk_size = 4 * 1024 * 1024 # 4 MB bar = DataTransferBar(max_value=total_size).start() completed_bytes = 0 try: with open(out_path, 'wb') as f: for chunk in r.iter_content(chunk_size=chunk_size): if chunk: f.write(chunk) completed_bytes += len(chunk) if completed_bytes > bar.max_value: bar.max_value = completed_bytes bar.update(completed_bytes) except BaseException as e: os.unlink(out_path) raise e bar.finish() logger.info('Done!')
def progress_urlretrieve(url, ofile): print("Downloading %s ..." % url) sys.stdout.flush() try: from progressbar import DataTransferBar, UnknownLength pbar = DataTransferBar() def _upd(count, size, total): if pbar.max_value is None: if total > 0: pbar.start(total) else: pbar.start(UnknownLength) pbar.update(min(count * size, total)) sys.stdout.flush() res = _urlretrieve(url, ofile, reporthook=_upd) try: pbar.finish() except: pass return res except ImportError: return _urlretrieve(url, ofile)
def _download_binaries(self, dep): """Download and untar tarball.""" download_dict = self.dependency_dict[dep]["download_binaries"] if self.platform in download_dict: download_url = download_dict[self.platform] else: logger.warning( f"No binaries for download for {dep}, will fake it.") return dlname = download_url.split("/")[-1] download_path = Path(".") / dlname logger.debug(f"downloading {dep} at {download_url} to {dlname}") if self.quiet: trackers = () else: trackers = (ProgressTracker(DataTransferBar()), ) request_download(download_url, download_path, trackers=trackers) logger.debug(f"downloaded file {download_path}, size" f" {download_path.stat().st_size}")
def _download_untar(self, dep): """Download and untar tarball.""" tar = sh.Command("tar") download_url = self.dependency_dict[dep]["tarball"] dlname = download_url.split("/")[-1] download_path = Path(".") / dlname logger.debug(f"downloading {dep} at {download_url} to {dlname}") if self.quiet: trackers = () else: trackers = (ProgressTracker(DataTransferBar()), ) request_download(download_url, download_path, trackers=trackers) logger.debug(f"downloaded file {download_path}, size" f" {download_path.stat().st_size}") try: tar("xvf", download_path, **self.output_kwargs) except: logger.error(f"untar of {download_path} failed") sys.exit(1)
def download_image_file(self, image): ''' Downloads the @image from the base URL location to the cwd. :param image: the filename portion of the URL to download and the local name the file will be downloaded to :returns: nothing ''' if image is None: raise Exception("No suitable ISO image found at {0}".format( self.base)) url = urljoin(self.base, image) if os.path.exists(image): print('File already found') return with DataTransferBar(max_value=UnknownLength) as bar: def update(a, r, t): bar.max_value = t try: bar.update(bar.value + r) except Exception: pass retries = 20 while retries > 0: try: print("Fetching: ", image) bar.update(0) urlretrieve(url, image, update) except HTTPError as e: print('HTTP error: ', e.code, url) retries = retries - 1 sleep(1) continue except URLError as e: print('URL error: ', e.reason, url) retries = retries - 1 sleep(1) continue else: break bar.finish()
def _download_untar(self, dep, dep_dict, verbose, progressbar=True): """Download and untar tarball.""" from sh import tar # isort:skip download_url = dep_dict["tarball"] dlname = download_url.split("/")[-1] download_path = Path(".") / dlname if verbose: print(f"downloading {download_url} to {dlname}") tmp_path = download_path / (dlname + ".tmp") if progressbar: trackers = (ProgressTracker(DataTransferBar()), ) else: trackers = None request_download(download_url, download_path, trackers=trackers) if verbose: print(f"downloaded file {download_path}, size" f" {download_path.stat().st_size}") tar_output = tar("xvf", download_path) if verbose: print(tar_output) print("untar done")
import certifi import hashlib # progressbar is provided by progressbar2 on PYPI. from progressbar import DataTransferBar from requests_download import download, HashTracker, ProgressTracker user_agent = { 'user-agent': 'Mozilla/5.0 (Windows NT 6.3; rv:36.0) Gecko/20100101 Firefox/36.0' } http = urllib3.PoolManager(cert_reqs="CERT_REQUIRED", ca_certs=certifi.where(), headers=user_agent) hasher = HashTracker(hashlib.sha256()) progress = ProgressTracker(DataTransferBar()) def downloadEp(ep): src = BeautifulSoup( http.request("GET", "http://anime.megamanonline.org" + ep.p.a["href"]).data, "html.parser") vimeourl = src.find("iframe")["src"] index = vimeourl.rindex("/") + 1 idvid = vimeourl[index:index + 8] vimeourl = "https://player.vimeo.com/video/" + idvid res = "" try:
def test_known_length(): dtb = DataTransferBar().start(max_value=50) for i in range(50): dtb.update(i) dtb.finish()
def test_unknown_length(): dtb = DataTransferBar().start(max_value=progressbar.UnknownLength) for i in range(50): dtb.update(i) dtb.finish()