def _download(self, req, fname, downloaded): """ Download file with progress bar. """ mode = 'ab' if downloaded else 'wb' name = os.path.basename(req.url) total_length = self._get_header(req, 'content-length') chunk_size = 1024 * 100 progress.update_target(name, downloaded, total_length) with open(fname, mode) as fobj: for chunk in req.iter_content(chunk_size=chunk_size): if not chunk: # filter out keep-alive new chunks continue fobj.write(chunk) downloaded += len(chunk) progress.update_target(name, downloaded, total_length) progress.finish_target(name)
def collect_dir_cache(self, dname): dir_info = [] bar = False for root, dirs, files in os.walk(dname): if len(files) > LARGE_DIR_SIZE: msg = "Computing md5 for a large directory {}. " \ "This is only done once." Logger.info(msg.format(os.path.relpath(dname))) bar = True title = os.path.relpath(dname) processed = 0 total = len(files) progress.update_target(title, 0, total) for fname in files: path = os.path.join(root, fname) relpath = self.unixpath(os.path.relpath(path, dname)) if bar: progress.update_target(title, processed, total) processed += 1 md5 = self.state.update(path) dir_info.append({self.PARAM_RELPATH: relpath, self.PARAM_MD5: md5}) if bar: progress.finish_target(title) # NOTE: sorting the list by path to ensure reproducibility dir_info = sorted(dir_info, key=itemgetter(self.PARAM_RELPATH)) md5 = dict_md5(dir_info) + self.MD5_DIR_SUFFIX if self.changed_cache(md5): self.dump_dir_cache(md5, dir_info) return (md5, dir_info)
def copyfile(src, dest, no_progress_bar=False, name=None): """Copy file with progress bar""" from dvc.progress import progress copied = 0 name = name if name else os.path.basename(dest) total = os.stat(src).st_size if os.path.isdir(dest): dest = os.path.join(dest, os.path.basename(src)) with open(src, "rb") as fsrc, open(dest, "wb+") as fdest: while True: buf = fsrc.read(LOCAL_CHUNK_SIZE) if not buf: break fdest.write(buf) copied += len(buf) if not no_progress_bar: progress.update_target(name, copied, total) if not no_progress_bar: progress.finish_target(name)
def upload(self, from_infos, to_infos, names=None, no_progress_bar=False): if not hasattr(self, "_upload"): raise RemoteActionNotImplemented("upload", self.scheme) names = self._verify_path_args(to_infos, from_infos, names) with self.transfer_context() as ctx: for from_info, to_info, name in zip(from_infos, to_infos, names): if to_info.scheme != self.scheme: raise NotImplementedError if from_info.scheme != "local": raise NotImplementedError msg = "Uploading '{}' to '{}'" logger.debug(msg.format(from_info, to_info)) if not name: name = from_info.name if not no_progress_bar: progress.update_target(name, 0, None) try: self._upload( from_info.fspath, to_info, name=name, ctx=ctx, no_progress_bar=no_progress_bar, ) except Exception: msg = "failed to upload '{}' to '{}'" logger.exception(msg.format(from_info, to_info)) continue if not no_progress_bar: progress.finish_target(name)
def _import(self, bucket_name, key, data_item): bucket = self._get_bucket_gc(bucket_name) fname = data_item.data.dvc name = os.path.basename(fname) tmp_file = self.tmp_file(fname) blob = bucket.get_blob(key) if not blob: Logger.error('File "{}" does not exist in the cloud'.format(key)) return None if self._cmp_checksum(blob, fname): Logger.debug('File "{}" matches with "{}".'.format(fname, key)) return data_item Logger.debug('Downloading cache file from gc "{}/{}"'.format(bucket.name, key)) # percent_cb is not available for download_to_filename, so # lets at least update progress at keypoints(start, finish) progress.update_target(name, 0, None) try: blob.download_to_filename(tmp_file) except Exception as exc: Logger.error('Failed to download "{}": {}'.format(key, exc)) return None os.rename(tmp_file, fname) data_item.move_data_to_cache() progress.finish_target(name) Logger.debug('Downloading completed') return data_item
def percent_cb(name, complete, total): Logger.debug('{}: {} transferred out of {}'.format(name, sizeof_fmt(complete), sizeof_fmt(total))) progress.update_target(os.path.basename(name), complete, total)