def test_progress_awareness(self, mocker, capsys, caplog): from dvc.progress import progress with mocker.patch("sys.stdout.isatty", return_value=True): progress.set_n_total(100) progress.update_target("progress", 1, 10) # logging an invisible message should not break # the progress bar output with caplog.at_level(logging.INFO, logger="dvc"): debug_record = logging.LogRecord( name="dvc", level=logging.DEBUG, pathname=__name__, lineno=1, msg="debug", args=(), exc_info=None, ) formatter.format(debug_record) captured = capsys.readouterr() assert "\n" not in captured.out # just when the message is actually visible with caplog.at_level(logging.INFO, logger="dvc"): logger.info("some info") captured = capsys.readouterr() assert "\n" in captured.out
def test_progress_awareness(self, mocker, capsys, caplog): from dvc.progress import Tqdm with mocker.patch("sys.stdout.isatty", return_value=True): with Tqdm(total=100, desc="progress") as pbar: pbar.update() # logging an invisible message should not break # the progress bar output with caplog.at_level(logging.INFO, logger="dvc"): debug_record = logging.LogRecord( name="dvc", level=logging.DEBUG, pathname=__name__, lineno=1, msg="debug", args=(), exc_info=None, ) formatter.format(debug_record) captured = capsys.readouterr() assert captured.out == "" # when the message is actually visible with caplog.at_level(logging.INFO, logger="dvc"): logger.info("some info") captured = capsys.readouterr() assert captured.out == ""
def save(self): if not os.path.exists(self.path): raise self.DoesNotExistError(self.rel_path) if not os.path.isfile(self.path) and not os.path.isdir(self.path): raise self.IsNotFileOrDirError(self.rel_path) if (os.path.isfile(self.path) and os.path.getsize(self.path) == 0) or ( os.path.isdir(self.path) and len(os.listdir(self.path)) == 0): msg = "file/directory '{}' is empty.".format(self.rel_path) logger.warning(msg) if not self.use_cache: self.info = self.remote.save_info(self.path_info) self.verify_metric() if not self.IS_DEPENDENCY: msg = "Output '{}' doesn't use cache. Skipping saving." logger.info(msg.format(self.rel_path)) return assert not self.IS_DEPENDENCY if not self.changed(): msg = "Output '{}' didn't change. Skipping saving." logger.info(msg.format(self.rel_path)) return if self.is_local: if self.repo.scm.is_tracked(self.path): raise OutputAlreadyTrackedError(self.rel_path) if self.use_cache: self.repo.scm.ignore(self.path) self.info = self.remote.save_info(self.path_info)
def run(self): from dvc.remote import _get, RemoteLOCAL remote = _get({Config.SECTION_REMOTE_URL: self.args.url}) if remote == RemoteLOCAL: self.args.url = self.resolve_path( self.args.url, self.configobj.filename ) section = Config.SECTION_REMOTE_FMT.format(self.args.name) if (section in self.configobj.keys()) and not self.args.force: logger.error( "Remote with name {} already exists. " "Use -f (--force) to overwrite remote " "with new value".format(self.args.name) ) return 1 ret = self._set(section, Config.SECTION_REMOTE_URL, self.args.url) if ret != 0: return ret if self.args.default: msg = "Setting '{}' as a default remote.".format(self.args.name) logger.info(msg) ret = self._set( Config.SECTION_CORE, Config.SECTION_CORE_REMOTE, self.args.name ) return ret
def run(self): recursive = not self.args.single_item saved_dir = os.path.realpath(os.curdir) if self.args.cwd: os.chdir(self.args.cwd) # Dirty hack so the for loop below can at least enter once if self.args.all_pipelines: self.args.targets = [None] ret = 0 for target in self.args.targets: try: stages = self.project.reproduce( target, recursive=recursive, force=self.args.force, dry=self.args.dry, interactive=self.args.interactive, pipeline=self.args.pipeline, all_pipelines=self.args.all_pipelines, ignore_build_cache=self.args.ignore_build_cache) if len(stages) == 0: logger.info(CmdDataStatus.UP_TO_DATE_MSG) if self.args.metrics: self.project.metrics_show() except DvcException: logger.error() ret = 1 break os.chdir(saved_dir) return ret
def save(self): if not self.use_cache: super(OutputLOCAL, self).save() self._verify_metric() msg = "Output '{}' doesn't use cache. Skipping saving." logger.info(msg.format(self.rel_path)) return if not os.path.exists(self.path): raise self.DoesNotExistError(self.rel_path) if (not os.path.isfile(self.path) and not os.path.isdir(self.path)): raise self.IsNotFileOrDirError(self.rel_path) if (os.path.isfile(self.path) and os.path.getsize(self.path) == 0) or \ (os.path.isdir(self.path) and len(os.listdir(self.path)) == 0): msg = "file/directory '{}' is empty.".format(self.rel_path) logger.warning(msg) if not self.changed(): msg = "Output '{}' didn't change. Skipping saving." logger.info(msg.format(self.rel_path)) return if self.is_local: if self.project.scm.is_tracked(self.path): raise OutputAlreadyTrackedError(self.rel_path) if self.use_cache: self.project.scm.ignore(self.path) self.info = self.project.cache.local.save(self.path_info)
def _save_dir(self, path_info): path = path_info["path"] md5, dir_info = self.state.update_info(path) dir_relpath = os.path.relpath(path) dir_size = len(dir_info) bar = dir_size > LARGE_DIR_SIZE logger.info("Linking directory '{}'.".format(dir_relpath)) for processed, entry in enumerate(dir_info): relpath = entry[self.PARAM_RELPATH] m = entry[self.PARAM_CHECKSUM] p = os.path.join(path, relpath) c = self.get(m) if self.changed_cache(m): self._move(p, c) else: remove(p) self.link(c, p) if bar: progress.update_target(dir_relpath, processed, dir_size) self.state.update_link(path) if bar: progress.finish_target(dir_relpath) return {self.PARAM_CHECKSUM: md5}
def show_metrics(metrics, all_branches=False, all_tags=False): for branch, val in metrics.items(): if all_branches or all_tags: logger.info("{}:".format(branch)) for fname, metric in val.items(): logger.info("\t{}: {}".format(fname, metric))
def reproduce( self, force=False, dry=False, interactive=False, no_commit=False ): if not self.changed() and not force: return None if (self.cmd or self.is_import) and not self.locked and not dry: # Removing outputs only if we actually have command to reproduce self.remove_outs(ignore_remove=False) msg = ( "Going to reproduce '{stage}'. " "Are you sure you want to continue?".format(stage=self.relpath) ) if interactive and not prompt.confirm(msg): raise DvcException("reproduction aborted by the user") logger.info("Reproducing '{stage}'".format(stage=self.relpath)) self.run(dry=dry, no_commit=no_commit) logger.debug("'{stage}' was reproduced".format(stage=self.relpath)) return self
def ignore(self, path): entry, gitignore = self._get_gitignore(path) ignore_list = [] if os.path.exists(gitignore): ignore_list = open(gitignore, "r").readlines() filtered = list( filter(lambda x: x.strip() == entry.strip(), ignore_list) ) if filtered: return msg = "Adding '{}' to '{}'.".format( os.path.relpath(path), os.path.relpath(gitignore) ) logger.info(msg) content = entry if ignore_list: content = "\n" + content with open(gitignore, "a") as fobj: fobj.write(content) if self.repo is not None: self.repo.files_to_git_add.append(os.path.relpath(gitignore))
def status(self, checksum_infos, remote, jobs=None, show_checksums=False): logger.info("Preparing to collect status from {}".format(remote.url)) title = "Collecting information" ret = {} progress.set_n_total(1) progress.update_target(title, 0, 100) progress.update_target(title, 10, 100) ret = self._group(checksum_infos, show_checksums=show_checksums) md5s = list(ret.keys()) progress.update_target(title, 30, 100) remote_exists = list(remote.cache_exists(md5s)) progress.update_target(title, 90, 100) local_exists = self.cache_exists(md5s) progress.finish_target(title) self._fill_statuses(ret, local_exists, remote_exists) self._log_missing_caches(ret) return ret
def status(self, checksum_infos, remote, jobs=None, show_checksums=False): logger.info("Preparing to collect status from {}".format(remote.url)) title = "Collecting information" ret = {} progress.set_n_total(1) progress.update_target(title, 0, 100) progress.update_target(title, 10, 100) ret = self._group(checksum_infos, show_checksums=show_checksums) md5s = list(ret.keys()) progress.update_target(title, 30, 100) remote_exists = list(remote.cache_exists(md5s)) progress.update_target(title, 90, 100) local_exists = self.cache_exists(md5s) progress.finish_target(title) for md5, info in ret.items(): info["status"] = STATUS_MAP[ (md5 in local_exists, md5 in remote_exists) ] return ret
def _show(self, metrics): for branch, val in metrics.items(): if self.args.all_branches or self.args.all_tags: logger.info("{}:".format(branch)) for fname, metric in val.items(): logger.info("\t{}: {}".format(fname, metric))
def do_run(self, target=None): indent = 1 if self.args.cloud else 0 try: st = self.project.status( target=target, jobs=self.args.jobs, cloud=self.args.cloud, show_checksums=self.args.show_checksums, remote=self.args.remote, all_branches=self.args.all_branches, all_tags=self.args.all_tags, with_deps=self.args.with_deps, ) if st: if self.args.quiet: return 1 else: self._show(st, indent) else: logger.info(self.UP_TO_DATE_MSG) except Exception: logger.error("failed to obtain data status") return 1 return 0
def _remind_to_git_add(self): if len(self._files_to_git_add) == 0: return msg = '\nTo track the changes with git run:\n\n' msg += '\tgit add ' + " ".join(self._files_to_git_add) logger.info(msg)
def create_module_dir(module_name): module_dir = os.path.join(GitPackage.MODULES_DIR, module_name) if os.path.exists(module_dir): logger.info("Updating package {}".format(module_name)) shutil.rmtree(module_dir) else: logger.info("Adding package {}".format(module_name)) return module_dir
def init(root_dir=os.curdir, no_scm=False, force=False): """ Creates an empty project on the given directory -- basically a `.dvc` directory with subdirectories for configuration and cache. It should be tracked by a SCM or use the `--no-scm` flag. If the given directory is not empty, you must use the `--force` flag to override it. Args: root_dir: Path to project's root directory. Returns: Project instance. Raises: KeyError: Raises an exception. """ import shutil from dvc.scm import SCM, Base from dvc.config import Config root_dir = os.path.abspath(root_dir) dvc_dir = os.path.join(root_dir, Project.DVC_DIR) scm = SCM(root_dir) if type(scm) == Base and not no_scm: raise InitError( "{project} is not tracked by any supported scm tool" " (e.g. git). Use '--no-scm' if you don't want to use any scm.".format( project=root_dir ) ) if os.path.isdir(dvc_dir): if not force: raise InitError( "'{project}' exists. Use '-f' to force.".format( project=os.path.relpath(dvc_dir) ) ) shutil.rmtree(dvc_dir) os.mkdir(dvc_dir) config = Config.init(dvc_dir) proj = Project(root_dir) scm.add([config.config_file]) if scm.ignore_file: scm.add([os.path.join(dvc_dir, scm.ignore_file)]) logger.info("\nYou can now commit the changes to git.\n") proj._welcome_message() return proj
def run(self): for section in self.configobj.keys(): r = re.match(Config.SECTION_REMOTE_REGEX, section) if r: name = r.group('name') url = self.configobj[section].get(Config.SECTION_REMOTE_URL, '') logger.info('{}\t{}'.format(name, url)) return 0
def _remind_to_git_add(self): if not self._files_to_git_add: return logger.info( '\n' 'To track the changes with git run:\n' '\n' '\tgit add {files}'.format(files=' '.join(self._files_to_git_add)))
def _remind_to_git_add(self): if not self.files_to_git_add: return logger.info( "\n" "To track the changes with git run:\n" "\n" "\tgit add {files}".format(files=" ".join(self.files_to_git_add)))
def show(self, config, section, opt): if section not in config.keys(): raise ConfigError("section '{}' doesn't exist".format(section)) if opt not in config[section].keys(): raise ConfigError("option '{}.{}' doesn't exist".format( section, opt)) logger.info(config[section][opt])
def _show(self, status, indent=0): ind = indent * self.STATUS_INDENT for key, value in status.items(): if isinstance(value, dict): logger.info("{}{}".format(ind, key)) self._show(value, indent + 1) else: msg = "{}{}{}".format(ind, self._normalize(value), key) logger.info(msg)
def dump(self, fname=None): fname = fname or self.path self._check_dvc_filename(fname) logger.info("Saving information to '{file}'.".format( file=os.path.relpath(fname))) with open(fname, "w") as fd: yaml.safe_dump(self.dumpd(), fd, default_flow_style=False) self.project.files_to_git_add.append(os.path.relpath(fname))
def dump(self, fname=None): if not fname: fname = self.path self._check_dvc_filename(fname) msg = "Saving information to '{}'.".format(os.path.relpath(fname)) logger.info(msg) with open(fname, 'w') as fd: yaml.safe_dump(self.dumpd(), fd, default_flow_style=False) self.project._files_to_git_add.append(os.path.relpath(fname))
def dump(self): fname = self.path self._check_dvc_filename(fname) logger.info("Saving information to '{file}'.".format( file=os.path.relpath(fname))) d = self.dumpd() with open(fname, "w") as fd: yaml.safe_dump(d, fd, default_flow_style=False) self.repo.scm.track_file(os.path.relpath(fname))
def run(self): section = Config.SECTION_REMOTE_FMT.format(self.args.name) ret = self._set(section, Config.SECTION_REMOTE_URL, self.args.url) if ret != 0: return ret if self.args.default: msg = "Setting '{}' as a default remote.".format(self.args.name) logger.info(msg) ret = self._set(Config.SECTION_CORE, Config.SECTION_CORE_REMOTE, self.args.name) return ret
def run(self, dry=False): if self.locked: msg = u"Verifying outputs in locked stage '{}'" logger.info(msg.format(self.relpath)) if not dry: self.check_missing_outputs() elif self.is_import: msg = u"Importing '{}' -> '{}'" logger.info(msg.format(self.deps[0].path, self.outs[0].path)) if not dry: self.deps[0].download(self.outs[0].path_info) elif self.is_data_source: msg = u"Verifying data sources in '{}'".format(self.relpath) logger.info(msg) if not dry: self.check_missing_outputs() else: msg = u'Running command:\n\t{}'.format(self.cmd) logger.info(msg) if not dry: self._run() if not dry: self.save()
def do_checkout(self, path_info, checksum, force=False): path = path_info["path"] md5 = checksum cache = self.get(md5) if not self.is_dir_cache(cache): if os.path.exists(path): self.safe_remove(path_info, force=force) self.link(cache, path) self.state.update_link(path) return # Create dir separately so that dir is created # even if there are no files in it if not os.path.exists(path): os.makedirs(path) dir_info = self.load_dir_cache(md5) dir_relpath = os.path.relpath(path) dir_size = len(dir_info) bar = dir_size > LARGE_DIR_SIZE logger.info("Linking directory '{}'.".format(dir_relpath)) for processed, entry in enumerate(dir_info): relpath = entry[self.PARAM_RELPATH] m = entry[self.PARAM_CHECKSUM] p = os.path.join(path, relpath) c = self.get(m) entry_info = {"scheme": path_info["scheme"], self.PARAM_PATH: p} entry_checksum_info = {self.PARAM_CHECKSUM: m} if self.changed(entry_info, entry_checksum_info): if os.path.exists(p): self.safe_remove(entry_info, force=force) self.link(c, p) if bar: progress.update_target(dir_relpath, processed, dir_size) self._discard_working_directory_changes(path, dir_info, force=force) self.state.update_link(path) if bar: progress.finish_target(dir_relpath)
def run(self, dry=False): if self.locked: logger.info("Verifying outputs in locked stage '{stage}'".format( stage=self.relpath)) if not dry: self.check_missing_outputs() elif self.is_import: logger.info("Importing '{dep}' -> '{out}'".format( dep=self.deps[0].path, out=self.outs[0].path)) if not dry: if self._already_cached(): self.outs[0].checkout() else: self.deps[0].download(self.outs[0].path_info) elif self.is_data_source: msg = "Verifying data sources in '{}'".format(self.relpath) logger.info(msg) if not dry: self.check_missing_outputs() else: logger.info("Running command:\n\t{}".format(self.cmd)) if not dry: if self._already_cached(): self.checkout() else: self._run() if not dry: self.save()
def save(self, path_info): if path_info['scheme'] != 'local': raise NotImplementedError path = path_info['path'] msg = "Saving '{}' to cache '{}'." logger.info( msg.format(os.path.relpath(path), os.path.relpath(self.cache_dir))) if os.path.isdir(path): return self._save_dir(path_info) else: return self._save_file(path_info)