def safe_remove(self, path_info, force=False): if not self.tree.exists(path_info): return if not force and not self.already_cached(path_info): msg = ("file '{}' is going to be removed." " Are you sure you want to proceed?".format(str(path_info))) if not prompt.confirm(msg): raise ConfirmRemoveError(str(path_info)) self.tree.remove(path_info)
def _collect_used_dir_cache(self, remote=None, force=False, jobs=None): """Get a list of `info`s related to the given directory. - Pull the directory entry from the remote cache if it was changed. Example: Given the following commands: $ echo "foo" > directory/foo $ echo "bar" > directory/bar $ dvc add directory It will return a NamedCache like: nc = NamedCache() nc.add(self.scheme, 'c157a79031e1', 'directory/foo') nc.add(self.scheme, 'd3b07384d113', 'directory/bar') """ cache = NamedCache() if self.cache.changed_cache_file(self.checksum): try: self.repo.cloud.pull( NamedCache.make("local", self.checksum, str(self)), jobs=jobs, remote=remote, show_checksums=False, ) except DvcException: logger.debug("failed to pull cache for '{}'".format(self)) if self.cache.changed_cache_file(self.checksum): msg = ( "Missing cache for directory '{}'. " "Cache for files inside will be lost. " "Would you like to continue? Use '-f' to force." ) if not force and not prompt.confirm(msg.format(self.path_info)): raise CollectCacheError( "unable to fully collect used cache" " without cache for directory '{}'".format(self) ) else: return cache for entry in self.dir_cache: checksum = entry[self.remote.PARAM_CHECKSUM] path_info = self.path_info / entry[self.remote.PARAM_RELPATH] cache.add(self.scheme, checksum, str(path_info)) return cache
def remove_with_prompt(self, force=False): if not self.exists(): return msg = ( "'{}' already exists. Do you wish to run the command and " "overwrite it?".format(relpath(self.path)) ) if not (force or prompt.confirm(msg)): raise StageFileAlreadyExistsError(self.path) self.remove()
def run(self): from dvc.repo.gc import _raise_error_if_all_disabled if not self.repo.experiments: return 0 _raise_error_if_all_disabled( all_branches=self.args.all_branches, all_tags=self.args.all_tags, all_commits=self.args.all_commits, workspace=self.args.workspace, ) msg = "This will remove all experiments except those derived from " msg += "the workspace" if self.args.all_commits: msg += " and all git commits" elif self.args.all_branches and self.args.all_tags: msg += " and all git branches and tags" elif self.args.all_branches: msg += " and all git branches" elif self.args.all_tags: msg += " and all git tags" msg += " of the current repo." if self.args.queued: msg += " Run queued experiments will be preserved." if self.args.queued: msg += " Run queued experiments will be removed." logger.warning(msg) msg = "Are you sure you want to proceed?" if not self.args.force and not prompt.confirm(msg): return 1 removed = self.repo.experiments.gc( all_branches=self.args.all_branches, all_tags=self.args.all_tags, all_commits=self.args.all_commits, workspace=self.args.workspace, queued=self.args.queued, ) if removed: logger.info( f"Removed {removed} experiments. To remove unused cache files " "use 'dvc gc'." ) else: logger.info("No experiments to remove.") return 0
def collect_used_dir_cache(self, remote=None, force=False, jobs=None, filter_info=None): """Get a list of `info`s related to the given directory. - Pull the directory entry from the remote cache if it was changed. Example: Given the following commands: $ echo "foo" > directory/foo $ echo "bar" > directory/bar $ dvc add directory It will return a NamedCache like: nc = NamedCache() nc.add(self.scheme, 'c157a79031e1', 'directory/foo') nc.add(self.scheme, 'd3b07384d113', 'directory/bar') """ cache = NamedCache() try: self.get_dir_cache(jobs=jobs, remote=remote) except DvcException: logger.debug(f"failed to pull cache for '{self}'") try: objects.check(self.odb, self.odb.get(self.hash_info)) except (FileNotFoundError, objects.ObjectFormatError): msg = ("Missing cache for directory '{}'. " "Cache for files inside will be lost. " "Would you like to continue? Use '-f' to force.") if not force and not prompt.confirm(msg.format(self.path_info)): raise CollectCacheError( "unable to fully collect used cache" " without cache for directory '{}'".format(self)) return cache path = str(self.path_info) filter_path = str(filter_info) if filter_info else None for entry_key, entry_hash_info in self.dir_cache.items(): entry_path = os.path.join(path, *entry_key) if (not filter_path or entry_path == filter_path or entry_path.startswith(filter_path + os.sep)): cache.add(self.scheme, entry_hash_info.value, entry_path) return cache
def run(self): from dvc.repo.gc import _raise_error_if_all_disabled _raise_error_if_all_disabled( all_branches=self.args.all_branches, all_tags=self.args.all_tags, all_commits=self.args.all_commits, workspace=self.args.workspace, ) msg = "This will remove all cache except items used in " msg += "the workspace" if self.args.all_commits: msg += " and all git commits" elif self.args.all_branches and self.args.all_tags: msg += " and all git branches and tags" elif self.args.all_branches: msg += " and all git branches" elif self.args.all_tags: msg += " and all git tags" elif self.args.all_experiments: msg += " and all experiments" if self.args.repos: msg += " of the current and the following repos:" for repo_path in self.args.repos: msg += "\n - %s" % os.path.abspath(repo_path) else: msg += " of the current repo." logger.warning(msg) msg = "Are you sure you want to proceed?" if not self.args.force and not prompt.confirm(msg): return 1 self.repo.gc( all_branches=self.args.all_branches, all_tags=self.args.all_tags, all_commits=self.args.all_commits, all_experiments=self.args.all_experiments, cloud=self.args.cloud, remote=self.args.remote, force=self.args.force, jobs=self.args.jobs, repos=self.args.repos, workspace=self.args.workspace, ) return 0
def _remove(path_info, tree, cache, force=False): if not tree.exists(path_info): return if not force and not _is_cached(cache, path_info, tree): msg = ( "file '{}' is going to be removed." " Are you sure you want to proceed?".format(str(path_info)) ) if not prompt.confirm(msg): raise ConfirmRemoveError(str(path_info)) tree.remove(path_info)
def safe_remove(self, path_info, force=False): if not self.exists(path_info): return if not force and not self.already_cached(path_info): msg = ("file '{}' is going to be removed." ' Are you sure you want to proceed?'.format(str(path_info))) if not prompt.confirm(msg): raise DvcException( "unable to remove '{}' without a confirmation" " from the user. Use '-f' to force.".format( str(path_info))) self.remove(path_info)
def _is_outs_only(self, target): if not self.args.purge: return True if self.args.force: return False msg = "Are you sure you want to remove {} with its outputs?".format( target) if prompt.confirm(msg): return False raise DvcException("Cannot purge without a confirmation from the user." " Use `-f` to force.")
def _remove(path_info, fs, in_cache, force=False): if not fs.exists(path_info): return if force: fs.remove(path_info) return if not in_cache: msg = (f"file/directory '{path_info}' is going to be removed. " "Are you sure you want to proceed?") if not prompt.confirm(msg): raise ConfirmRemoveError(str(path_info)) fs.remove(path_info)
def reproduce(self, interactive=False, **kwargs): if not (kwargs.get("force", False) or self.changed()): logger.info("Stage '%s' didn't change, skipping", self.addressing) return None msg = ("Going to reproduce {stage}. " "Are you sure you want to continue?".format(stage=self)) if interactive and not prompt.confirm(msg): raise DvcException("reproduction aborted by the user") self.run(**kwargs) logger.debug(f"{self} was reproduced") return self
def reproduce(self, interactive=False, **kwargs): if not kwargs.get("force", False) and not self.changed(): return None msg = ("Going to reproduce {stage}. " "Are you sure you want to continue?".format(stage=self)) if interactive and not prompt.confirm(msg): raise DvcException("reproduction aborted by the user") self.run(**kwargs) logger.debug("{stage} was reproduced".format(stage=self)) return self
def check_can_commit(self, force): changed_deps = self._changed_entries(self.deps) changed_outs = self._changed_entries(self.outs) if changed_deps or changed_outs or self.changed_md5(): msg = ("dependencies {}".format(changed_deps) if changed_deps else "") msg += " and " if (changed_deps and changed_outs) else "" msg += "outputs {}".format(changed_outs) if changed_outs else "" msg += "md5" if not (changed_deps or changed_outs) else "" msg += " of '{}' changed. Are you sure you commit it?".format( self.relpath) if not force and not prompt.confirm(msg): raise StageCommitError( "unable to commit changed '{}'. Use `-f|--force` to " "force.`".format(self.relpath)) self.save()
def check_can_commit(self, force): changed_deps = self._changed_entries(self.deps) changed_outs = self._changed_entries(self.outs) if changed_deps or changed_outs or self.stage_changed(): msg = ("dependencies {}".format(changed_deps) if changed_deps else "") msg += " and " if (changed_deps and changed_outs) else "" msg += "outputs {}".format(changed_outs) if changed_outs else "" msg += "md5" if not (changed_deps or changed_outs) else "" msg += " of {} changed. ".format(self) msg += "Are you sure you want to commit it?" if not force and not prompt.confirm(msg): raise StageCommitError( "unable to commit changed {}. Use `-f|--force` to " "force.".format(self)) self.save()
def run(self): try: statement = ( "This will destroy all information about your pipelines," " all data files, as well as cache in .dvc/cache." "\n" "Are you sure you want to continue?") if not self.args.force and not prompt.confirm(statement): raise DvcException( "cannot destroy without a confirmation from the user." " Use `-f` to force.") self.repo.destroy() except Exception: logger.exception("failed to destroy DVC") return 1 return 0
def run_cmd(self): try: statement = ( 'This will destroy all information about your pipelines,' ' all data files, as well as cache in .dvc/cache.' '\n' 'Are you sure you want to continue?') if not self.args.force and not prompt.confirm(statement): raise DvcException( "cannot destroy without a confirmation from the user." " Use '-f' to force.") self.project.destroy() except Exception: logger.error('failed to destroy DVC') return 1 return 0
def _remove(path_info, fs, cache, force=False): if not fs.exists(path_info): return if force: fs.remove(path_info) return current = stage(cache, path_info, fs, fs.PARAM_CHECKSUM).hash_info try: obj = load(cache, current) check(cache, obj) except (FileNotFoundError, ObjectFormatError): msg = (f"file/directory '{path_info}' is going to be removed. " "Are you sure you want to proceed?") if not prompt.confirm(msg): raise ConfirmRemoveError(str(path_info)) fs.remove(path_info)
def reproduce( self, force=False, dry=False, interactive=False, no_commit=False ): if not self.changed() and not force: return None msg = ( "Going to reproduce '{stage}'. " "Are you sure you want to continue?".format(stage=self.relpath) ) if interactive and not prompt.confirm(msg): raise DvcException("reproduction aborted by the user") logger.info("Reproducing '{stage}'".format(stage=self.relpath)) self.run(dry=dry, no_commit=no_commit, force=force) logger.debug("'{stage}' was reproduced".format(stage=self.relpath)) return self
def reproduce(self, force=False, dry=False, interactive=False): if not self.changed() and not force: return None if (self.cmd or self.is_import) and not self.locked and not dry: # Removing outputs only if we actually have command to reproduce self.remove_outs(ignore_remove=False) msg = "Going to reproduce '{stage}'. Are you sure you want to continue?".format( stage=self.relpath) if interactive and not prompt.confirm(msg): raise DvcException("reproduction aborted by the user") logger.info("Reproducing '{stage}'".format(stage=self.relpath)) self.run(dry=dry) logger.debug("'{stage}' was reproduced".format(stage=self.relpath)) return self
def _collect_dir_cache(self, out, branch=None, remote=None, force=False, jobs=None): info = out.dumpd() ret = [info] r = out.remote md5 = info[r.PARAM_CHECKSUM] if self.cache.local.changed_cache_file(md5): try: self.cloud.pull(ret, jobs=jobs, remote=remote, show_checksums=False) except DvcException as exc: msg = "Failed to pull cache for '{}': {}" logger.debug(msg.format(out, exc)) if self.cache.local.changed_cache_file(md5): msg = ("Missing cache for directory '{}'. " "Cache for files inside will be lost. " "Would you like to continue? Use '-f' to force. ") if not force and not prompt.confirm(msg): raise DvcException( "unable to fully collect used cache" " without cache for directory '{}'".format(out)) else: return ret for i in self.cache.local.load_dir_cache(md5): i["branch"] = branch i[r.PARAM_PATH] = os.path.join(info[r.PARAM_PATH], i[r.PARAM_RELPATH]) ret.append(i) return ret
def create( repo=None, cmd=None, deps=None, outs=None, outs_no_cache=None, metrics=None, metrics_no_cache=None, fname=None, cwd=None, wdir=None, locked=False, add=False, overwrite=True, ignore_build_cache=False, remove_outs=False, validate_state=True, outs_persist=None, outs_persist_no_cache=None, erepo=None, ): if outs is None: outs = [] if deps is None: deps = [] if outs_no_cache is None: outs_no_cache = [] if metrics is None: metrics = [] if metrics_no_cache is None: metrics_no_cache = [] if outs_persist is None: outs_persist = [] if outs_persist_no_cache is None: outs_persist_no_cache = [] # Backward compatibility for `cwd` option if wdir is None and cwd is not None: if fname is not None and os.path.basename(fname) != fname: raise StageFileBadNameError( "stage file name '{fname}' may not contain subdirectories" " if '-c|--cwd' (deprecated) is specified. Use '-w|--wdir'" " along with '-f' to specify stage file path and working" " directory.".format(fname=fname) ) wdir = cwd else: wdir = os.curdir if wdir is None else wdir stage = Stage(repo=repo, wdir=wdir, cmd=cmd, locked=locked) Stage._fill_stage_outputs( stage, outs, outs_no_cache, metrics, metrics_no_cache, outs_persist, outs_persist_no_cache, ) stage.deps = dependency.loads_from(stage, deps, erepo=erepo) stage._check_circular_dependency() stage._check_duplicated_arguments() if not fname: fname = Stage._stage_fname(stage.outs, add=add) stage._check_dvc_filename(fname) wdir = os.path.abspath(wdir) if cwd is not None: path = os.path.join(wdir, fname) else: path = os.path.abspath(fname) Stage._check_stage_path(repo, wdir) Stage._check_stage_path(repo, os.path.dirname(path)) stage.wdir = wdir stage.path = path # NOTE: remove outs before we check build cache if remove_outs: logger.warning( "--remove-outs is deprecated." " It is now the default behavior," " so there's no need to use this option anymore." ) stage.remove_outs(ignore_remove=False) logger.warning("Build cache is ignored when using --remove-outs.") ignore_build_cache = True else: stage.unprotect_outs() if os.path.exists(path) and any(out.persist for out in stage.outs): logger.warning("Build cache is ignored when persisting outputs.") ignore_build_cache = True if validate_state: if os.path.exists(path): if not ignore_build_cache and stage.is_cached: logger.info("Stage is cached, skipping.") return None msg = ( "'{}' already exists. Do you wish to run the command and " "overwrite it?".format(stage.relpath) ) if not overwrite and not prompt.confirm(msg): raise StageFileAlreadyExistsError(stage.relpath) os.unlink(path) return stage
def test_eof(self, mock_input, mock_isatty): ret = confirm("message") mock_isatty.assert_called() mock_input.assert_called() self.assertFalse(ret)
def create( project=None, cmd=None, deps=[], outs=[], outs_no_cache=[], metrics_no_cache=[], fname=None, cwd=os.curdir, locked=False, add=False, overwrite=True, ignore_build_cache=False, remove_outs=False, ): stage = Stage(project=project, cwd=cwd, cmd=cmd, locked=locked) stage.outs = output.loads_from(stage, outs, use_cache=True) stage.outs += output.loads_from(stage, outs_no_cache, use_cache=False) stage.outs += output.loads_from(stage, metrics_no_cache, use_cache=False, metric=True) stage.deps = dependency.loads_from(stage, deps) stage._check_circular_dependency() stage._check_duplicated_arguments() if fname is not None and os.path.basename(fname) != fname: raise StageFileBadNameError( "stage file name '{fname}' should not contain subdirectories." " Use '-c|--cwd' to change location of the stage file.".format( fname=fname)) fname, cwd = Stage._stage_fname_cwd(fname, cwd, stage.outs, add=add) Stage._check_inside_project(project, cwd) cwd = os.path.abspath(cwd) path = os.path.join(cwd, fname) stage.cwd = cwd stage.path = path # NOTE: remove outs before we check build cache if remove_outs: stage.remove_outs(ignore_remove=False) logger.warning("Build cache is ignored when using --remove-outs.") ignore_build_cache = True else: stage.unprotect_outs() if os.path.exists(path): if not ignore_build_cache and stage.is_cached: logger.info("Stage is cached, skipping.") return None msg = ("'{}' already exists. Do you wish to run the command and " "overwrite it?".format(stage.relpath)) if not overwrite and not prompt.confirm(msg): raise StageFileAlreadyExistsError(stage.relpath) os.unlink(path) return stage
def prompt_to_commit(stage, changes, force=False): if not (force or prompt.confirm(_prepare_message(stage, changes))): raise StageCommitError( "unable to commit changed {}. Use `-f|--force` to " "force.".format(stage))
def create(repo, **kwargs): wdir = kwargs.get("wdir", None) cwd = kwargs.get("cwd", None) fname = kwargs.get("fname", None) add = kwargs.get("add", False) # Backward compatibility for `cwd` option if wdir is None and cwd is not None: if fname is not None and os.path.basename(fname) != fname: raise StageFileBadNameError( "DVC-file name '{fname}' may not contain subdirectories" " if `-c|--cwd` (deprecated) is specified. Use `-w|--wdir`" " along with `-f` to specify DVC-file path with working" " directory.".format(fname=fname)) wdir = cwd elif wdir is None: wdir = os.curdir stage = Stage( repo=repo, wdir=wdir, cmd=kwargs.get("cmd", None), locked=kwargs.get("locked", False), always_changed=kwargs.get("always_changed", False), ) Stage._fill_stage_outputs(stage, **kwargs) stage.deps = dependency.loads_from(stage, kwargs.get("deps", []), erepo=kwargs.get("erepo", None)) stage._check_circular_dependency() stage._check_duplicated_arguments() if not fname: fname = Stage._stage_fname(stage.outs, add) stage._check_dvc_filename(fname) # Autodetecting wdir for add, we need to create outs first to do that, # so we start with wdir = . and remap out paths later. if add and kwargs.get("wdir") is None and cwd is None: wdir = os.path.dirname(fname) for out in chain(stage.outs, stage.deps): if out.is_in_repo: out.def_path = relpath(out.path_info, wdir) wdir = os.path.abspath(wdir) if cwd is not None: path = os.path.join(wdir, fname) else: path = os.path.abspath(fname) Stage._check_stage_path(repo, wdir) Stage._check_stage_path(repo, os.path.dirname(path)) stage.wdir = wdir stage.path = path ignore_build_cache = kwargs.get("ignore_build_cache", False) # NOTE: remove outs before we check build cache if kwargs.get("remove_outs", False): logger.warning("--remove-outs is deprecated." " It is now the default behavior," " so there's no need to use this option anymore.") stage.remove_outs(ignore_remove=False) logger.warning("Build cache is ignored when using --remove-outs.") ignore_build_cache = True if os.path.exists(path) and any(out.persist for out in stage.outs): logger.warning("Build cache is ignored when persisting outputs.") ignore_build_cache = True if os.path.exists(path): if (not ignore_build_cache and stage.is_cached and not stage.is_callback and not stage.always_changed): logger.info("Stage is cached, skipping.") return None msg = ("'{}' already exists. Do you wish to run the command and " "overwrite it?".format(stage.relpath)) if not kwargs.get("overwrite", True) and not prompt.confirm(msg): raise StageFileAlreadyExistsError(stage.relpath) os.unlink(path) return stage