def _sort_column(sort_by, metric_names, param_names): path, _, sort_name = sort_by.rpartition(":") matches = set() if path: if path in metric_names and sort_name in metric_names[path]: matches.add((path, sort_name, "metrics")) if path in param_names and sort_name in param_names[path]: matches.add((path, sort_name, "params")) else: for path in metric_names: if sort_name in metric_names[path]: matches.add((path, sort_name, "metrics")) for path in param_names: if sort_name in param_names[path]: matches.add((path, sort_name, "params")) if len(matches) == 1: return matches.pop() if len(matches) > 1: raise InvalidArgumentError( "Ambiguous sort column '{}' matched '{}'".format( sort_by, ", ".join([f"{path}:{name}" for path, name, _ in matches]), )) raise InvalidArgumentError(f"Unknown sort column '{sort_by}'")
def loads_param_overrides( path_params: Iterable[str], ) -> Dict[str, Dict[str, Any]]: """Loads the content of params from the cli as Python object.""" from ruamel.yaml import YAMLError from dvc.dependency.param import ParamsDependency from dvc.exceptions import InvalidArgumentError from .serialize import loads_yaml ret: Dict[str, Dict[str, Any]] = defaultdict(dict) for path_param in path_params: param_name, _, param_value = path_param.partition("=") if not param_value: raise InvalidArgumentError( f"Must provide a value for parameter '{param_name}'") path, _, param_name = param_name.partition(":") if not param_name: param_name = path path = ParamsDependency.DEFAULT_PARAMS_FILE try: ret[path][param_name] = loads_yaml(param_value) except (ValueError, YAMLError): raise InvalidArgumentError( f"Invalid parameter value for '{param_name}': '{param_value}") return ret
def validate_kwargs(single_stage: bool = False, fname: str = None, **kwargs): """Prepare, validate and process kwargs passed from cli""" cmd = kwargs.get("cmd") if not cmd and not single_stage: raise InvalidArgumentError("command is not specified") stage_name = kwargs.get("name") if stage_name and single_stage: raise InvalidArgumentError( "`-n|--name` is incompatible with `--single-stage`" ) if stage_name and fname: raise InvalidArgumentError( "`--file` is currently incompatible with `-n|--name` " "and requires `--single-stage`" ) if not stage_name and not single_stage: raise InvalidArgumentError("`-n|--name` is required") if single_stage: kwargs.pop("name", None) if kwargs.get("live") and kwargs.get("live_no_cache"): raise InvalidArgumentError( "cannot specify both `--live` and `--live-no-cache`" ) kwargs.update( { "live_summary": not kwargs.pop("live_no_summary", False), "live_html": not kwargs.pop("live_no_html", False), } ) return kwargs
def run(self): from dvc.compare import show_metrics if self.args.checkpoint_resume: if self.args.reset: raise InvalidArgumentError( "--reset and --rev are mutually exclusive.") if not (self.args.queue or self.args.tmp_dir): raise InvalidArgumentError( "--rev can only be used in conjunction with " "--queue or --temp.") if self.args.reset: logger.info("Any existing checkpoints will be reset and re-run.") results = self.repo.experiments.run( name=self.args.name, queue=self.args.queue, run_all=self.args.run_all, jobs=self.args.jobs, params=self.args.set_param, checkpoint_resume=self.args.checkpoint_resume, reset=self.args.reset, tmp_dir=self.args.tmp_dir, **self._repro_kwargs, ) if self.args.metrics and results: metrics = self.repo.metrics.show(revs=list(results)) metrics.pop("workspace", None) logger.info(show_metrics(metrics)) return 0
def run(self, fname=None, no_exec=False, single_stage=False, **kwargs): from dvc.stage import PipelineStage, Stage, create_stage from dvc.dvcfile import Dvcfile, PIPELINE_FILE if not kwargs.get("cmd"): raise InvalidArgumentError("command is not specified") stage_cls = PipelineStage path = PIPELINE_FILE stage_name = kwargs.get("name") if stage_name and single_stage: raise InvalidArgumentError( "`-n|--name` is incompatible with `--single-stage`") if not stage_name and not single_stage: raise InvalidArgumentError("`-n|--name` is required") if single_stage: kwargs.pop("name", None) stage_cls = Stage path = fname or _get_file_path(kwargs) else: if not is_valid_name(stage_name): raise InvalidStageName params = parse_params(kwargs.pop("params", [])) stage = create_stage(stage_cls, repo=self, path=path, params=params, **kwargs) if stage is None: return None dvcfile = Dvcfile(self, stage.path) if dvcfile.exists(): if kwargs.get("overwrite", True): dvcfile.remove_stage(stage) elif stage_cls != PipelineStage: raise StageFileAlreadyExistsError(dvcfile.relpath) elif stage_name and stage_name in dvcfile.stages: raise DuplicateStageName(stage_name, dvcfile) try: self.check_modified_graph([stage]) except OutputDuplicationError as exc: raise OutputDuplicationError(exc.output, set(exc.stages) - {stage}) if no_exec: stage.ignore_outs() else: stage.run( no_commit=kwargs.get("no_commit", False), run_cache=kwargs.get("run_cache", True), ) dvcfile.dump(stage, update_pipeline=True, no_lock=no_exec) return stage
def create_stage_from_cli( repo: "Repo", single_stage: bool = False, fname: str = None, validate: bool = False, force: bool = False, **kwargs: Any, ) -> Union["Stage", "PipelineStage"]: from dvc.dvcfile import PIPELINE_FILE from . import PipelineStage, Stage, create_stage, restore_meta cmd = kwargs.get("cmd") if not cmd: raise InvalidArgumentError("command is not specified") stage_name = kwargs.get("name") if stage_name and single_stage: raise InvalidArgumentError( "`-n|--name` is incompatible with `--single-stage`") if stage_name and fname: raise InvalidArgumentError( "`--file` is currently incompatible with `-n|--name` " "and requires `--single-stage`") if not stage_name and not single_stage: raise InvalidArgumentError("`-n|--name` is required") if single_stage: kwargs.pop("name", None) stage_cls = Stage path = fname or _get_file_path(kwargs) else: path = PIPELINE_FILE stage_cls = PipelineStage if not (stage_name and is_valid_name(stage_name)): raise InvalidStageName kwargs["cmd"] = cmd[0] if isinstance(cmd, list) and len(cmd) == 1 else cmd kwargs["live_summary"] = not kwargs.pop("live_no_summary", False) kwargs["live_html"] = not kwargs.pop("live_no_html", False) params = chunk_dict(parse_params(kwargs.pop("params", []))) stage = create_stage(stage_cls, repo=repo, path=path, params=params, **kwargs) if validate: validate_state(repo, stage, force=force) restore_meta(stage) return stage
def run(self, fname=None, no_exec=False, single_stage=False, **kwargs): from dvc.stage import PipelineStage, Stage, create_stage from dvc.dvcfile import Dvcfile, PIPELINE_FILE stage_cls = PipelineStage path = PIPELINE_FILE stage_name = kwargs.get("name") if stage_name and single_stage: raise InvalidArgumentError( "`-n|--name` is incompatible with `--single-stage`") if not stage_name and not single_stage: raise InvalidArgumentError("`-n|--name` is required") if single_stage: kwargs.pop("name", None) stage_cls = Stage path = fname or _get_file_path(kwargs) else: if not is_valid_name(stage_name): raise InvalidStageName stage = create_stage(stage_cls, repo=self, path=path, **kwargs) if stage is None: return None dvcfile = Dvcfile(self, stage.path) if dvcfile.exists(): if stage_name and stage_name in dvcfile.stages: raise DuplicateStageName(stage_name, dvcfile) if stage_cls != PipelineStage: dvcfile.remove_with_prompt(force=kwargs.get("overwrite", True)) try: self.check_modified_graph([stage]) except OutputDuplicationError as exc: raise OutputDuplicationError(exc.output, set(exc.stages) - {stage}) if no_exec: for out in stage.outs: out.ignore() else: stage.run( no_commit=kwargs.get("no_commit", False), ignore_build_cache=kwargs.get("ignore_build_cache", False), ) dvcfile.dump(stage, update_pipeline=True) return stage
def branch(repo, exp_rev, branch_name, *args, **kwargs): from dvc.scm import resolve_rev try: rev = resolve_rev(repo.scm, exp_rev) except RevError: raise InvalidArgumentError(exp_rev) ref_info = None ref_infos = list(exp_refs_by_rev(repo.scm, rev)) if len(ref_infos) == 1: ref_info = ref_infos[0] elif len(ref_infos) > 1: current_rev = repo.scm.get_rev() for info in ref_infos: if info.baseline_sha == current_rev: ref_info = info break if not ref_info: msg = [ f"Ambiguous experiment name '{exp_rev}' can refer to " "multiple experiments. To create a branch use a full " "experiment ref:", "", ] msg.extend([str(info) for info in ref_infos]) raise InvalidArgumentError("\n".join(msg)) if not ref_info: raise InvalidExpRevError(exp_rev) branch_ref = f"refs/heads/{branch_name}" if repo.scm.get_ref(branch_ref): raise InvalidArgumentError( f"Git branch '{branch_name}' already exists." ) target = repo.scm.get_ref(str(ref_info)) repo.scm.set_ref( branch_ref, target, message=f"dvc: Created from experiment '{ref_info.name}'", ) fmt = ( "Git branch '%s' has been created from experiment '%s'.\n" "To switch to the new branch run:\n\n" "\tgit checkout %s" ) logger.info(fmt, branch_name, ref_info.name, branch_name)
def _get_exp_refs(repo, exp_names): cur_rev = repo.scm.get_rev() for name in exp_names: if name.startswith(EXPS_NAMESPACE): if not repo.scm.get_ref(name): raise InvalidArgumentError( f"'{name}' is not a valid experiment name") yield ExpRefInfo.from_ref(name) else: exp_refs = list(exp_refs_by_name(repo.scm, name)) if not exp_refs: raise InvalidArgumentError( f"'{name}' is not a valid experiment name") yield _get_ref(exp_refs, name, cur_rev)
def resolve_exp_ref(scm, exp_name: str, git_remote: Optional[str] = None) -> Optional[ExpRefInfo]: if exp_name.startswith("refs/"): return ExpRefInfo.from_ref(exp_name) if git_remote: exp_ref_list = list(remote_exp_refs_by_name(scm, git_remote, exp_name)) else: exp_ref_list = list(exp_refs_by_name(scm, exp_name)) if not exp_ref_list: return None if len(exp_ref_list) > 1: cur_rev = scm.get_rev() for info in exp_ref_list: if info.baseline_sha == cur_rev: return info if git_remote: msg = [ (f"Ambiguous name '{exp_name}' refers to multiple " "experiments. Use full refname to push one of the " "following:"), "", ] else: msg = [ (f"Ambiguous name '{exp_name}' refers to multiple " f"experiments in '{git_remote}'. Use full refname to pull " "one of the following:"), "", ] msg.extend([f"\t{info}" for info in exp_ref_list]) raise InvalidArgumentError("\n".join(msg)) return exp_ref_list[0]
def _filter_name(names, label, filter_strs): ret = defaultdict(dict) path_filters = defaultdict(list) for filter_s in filter_strs: path, _, name = filter_s.rpartition(":") path_filters[path].append(tuple(name.split("."))) for path, filters in path_filters.items(): if path: match_paths = [path] else: match_paths = names.keys() for length, groups in groupby(filters, len): for group in groups: for match_path in match_paths: possible_names = [ tuple(name.split(".")) for name in names[match_path] ] matches = [ name for name in possible_names if name[:length] == group ] if not matches: name = ".".join(group) raise InvalidArgumentError( f"'{name}' does not match any known {label}") ret[match_path].update( {".".join(match): None for match in matches}) return ret
def _filter_name(names, label, filter_strs): ret = defaultdict(dict) path_filters = defaultdict(list) for filter_s in filter_strs: path, _, name = filter_s.rpartition(":") path_filters[path].append(name) for path, filters in path_filters.items(): if path: match_paths = [path] else: match_paths = names.keys() for match_path in match_paths: for f in filters: matches = [ name for name in names[match_path] if fnmatch(name, f) ] if not matches: raise InvalidArgumentError( f"'{f}' does not match any known {label}" ) ret[match_path].update({match: None for match in matches}) return ret
def _parse_params(path_params): from ruamel.yaml import YAMLError from dvc.dependency.param import ParamsDependency from dvc.utils.flatten import unflatten from dvc.utils.serialize import loads_yaml ret = {} for path_param in path_params: path, _, params_str = path_param.rpartition(":") # remove empty strings from params, on condition such as `-p "file1:"` params = {} for param_str in filter(bool, params_str.split(",")): try: # interpret value strings using YAML rules key, value = param_str.split("=") params[key] = loads_yaml(value) except (ValueError, YAMLError): raise InvalidArgumentError( f"Invalid param/value pair '{param_str}'" ) if not path: path = ParamsDependency.DEFAULT_PARAMS_FILE ret[path] = unflatten(params) return ret
def update( self, targets=None, rev=None, recursive=False, to_remote=False, remote=None, jobs=None, ): from ..dvcfile import Dvcfile if not targets: targets = [None] if isinstance(targets, str): targets = [targets] if not to_remote and remote: raise InvalidArgumentError( "--remote can't be used without --to-remote" ) stages = set() for target in targets: stages.update(self.stage.collect(target, recursive=recursive)) for stage in stages: stage.update(rev, to_remote=to_remote, remote=remote, jobs=jobs) dvcfile = Dvcfile(self, stage.path) dvcfile.dump(stage) return list(stages)
def pull(repo, git_remote, exp_name, *args, force=False, pull_cache=False, **kwargs): exp_ref = resolve_exp_ref(repo.scm, exp_name, git_remote) if not exp_ref: raise InvalidArgumentError( f"Experiment '{exp_name}' does not exist in '{git_remote}'") def on_diverged(refname: str, rev: str) -> bool: if repo.scm.get_ref(refname) == rev: return True raise DvcException( f"Local experiment '{exp_name}' has diverged from remote " "experiment with the same name. To override the local experiment " "re-run with '--force'.") refspec = f"{exp_ref}:{exp_ref}" logger.debug("git pull experiment '%s' -> '%s'", git_remote, refspec) repo.scm.fetch_refspecs(git_remote, [refspec], force=force, on_diverged=on_diverged) if pull_cache: _pull_cache(repo, exp_ref, **kwargs)
def push( repo, git_remote, exp_name: str, *args, force=False, push_cache=False, **kwargs, ): exp_ref = resolve_exp_ref(repo.scm, exp_name) if not exp_ref: raise InvalidArgumentError( f"'{exp_name}' is not a valid experiment name") def on_diverged(refname: str, rev: str) -> bool: if repo.scm.get_ref(refname) == rev: return True raise DvcException( f"Local experiment '{exp_name}' has diverged from remote " "experiment with the same name. To override the remote experiment " "re-run with '--force'.") refname = str(exp_ref) logger.debug("git push experiment '%s' -> '%s'", exp_ref, git_remote) repo.scm.push_refspec(git_remote, refname, refname, force=force, on_diverged=on_diverged) if push_cache: _push_cache(repo, exp_ref, **kwargs)
def pull( repo, git_remote, exp_name, *args, force=False, pull_cache=False, **kwargs ): exp_ref_dict = resolve_name(repo.scm, exp_name, git_remote) exp_ref = exp_ref_dict[exp_name] if not exp_ref: raise InvalidArgumentError( f"Experiment '{exp_name}' does not exist in '{git_remote}'" ) def on_diverged(refname: str, rev: str) -> bool: if repo.scm.get_ref(refname) == rev: return True raise DvcException( f"Local experiment '{exp_name}' has diverged from remote " "experiment with the same name. To override the local experiment " "re-run with '--force'." ) refspec = f"{exp_ref}:{exp_ref}" logger.debug("git pull experiment '%s' -> '%s'", git_remote, refspec) from dvc.scm import TqdmGit with TqdmGit(desc="Fetching git refs") as pbar: repo.scm.fetch_refspecs( git_remote, [refspec], force=force, on_diverged=on_diverged, progress=pbar.update_git, ) if pull_cache: _pull_cache(repo, exp_ref, **kwargs)
def check_ref_format(scm: "Git", ref: ExpRefInfo): # "/" forbidden, only in dvc exp as we didn't support it for now. if not scm.check_ref_format(str(ref)) or "/" in ref.name: raise InvalidArgumentError( f"Invalid exp name {ref.name}, the exp name must follow rules in " "https://git-scm.com/docs/git-check-ref-format" )
def validate_name(name: str): from dvc.exceptions import InvalidArgumentError name = name.lower() if name in RESERVED_NAMES: raise InvalidArgumentError( f"Machine name '{name}' is reserved for internal DVC use.")
def status( self, targets=None, jobs=None, cloud=False, remote=None, all_branches=False, with_deps=False, all_tags=False, all_commits=False, ): if cloud or remote: return _cloud_status( self, targets, jobs, all_branches=all_branches, with_deps=with_deps, remote=remote, all_tags=all_tags, all_commits=all_commits, ) ignored = list( compress( ["--all-branches", "--all-tags", "--all-commits", "--jobs"], [all_branches, all_tags, all_commits, jobs], )) if ignored: msg = "The following options are meaningless for local status: {}" raise InvalidArgumentError(msg.format(", ".join(ignored))) return _local_status(self, targets, with_deps=with_deps)
def loads_params(path_params: Iterable[str],) -> Dict[str, Dict[str, Any]]: """Loads the content of params from the cli as Python object.""" from ruamel.yaml import YAMLError from dvc.exceptions import InvalidArgumentError from .serialize import loads_yaml normalized_params = parse_params(path_params) ret: Dict[str, Dict[str, Any]] = defaultdict(dict) for part in normalized_params: assert part (item,) = part.items() path, param_keys = item for param_str in param_keys: try: key, _, value = param_str.partition("=") # interpret value strings using YAML rules parsed = loads_yaml(value) ret[path][key] = parsed except (ValueError, YAMLError): raise InvalidArgumentError( f"Invalid param/value pair '{param_str}'" ) return ret
def _filter_names( names: Dict[str, Dict[str, None]], label: str, include: Optional[Iterable], exclude: Optional[Iterable], ): if include and exclude: intersection = set(include) & set(exclude) if intersection: values = ", ".join(intersection) raise InvalidArgumentError( f"'{values}' specified in both --include-{label} and" f" --exclude-{label}") if include: ret = _filter_name(names, label, include) else: ret = names if exclude: to_remove = _filter_name(names, label, exclude) for path in to_remove: if path in ret: for key in to_remove[path]: if key in ret[path]: del ret[path][key] return ret
def run(self): from dvc.command.stage import parse_cmd cmd = parse_cmd(self.args.command) if not self.args.interactive and not cmd: raise InvalidArgumentError("command is not specified") from dvc.repo.experiments.init import init defaults = {} if not self.args.explicit: config = self.repo.config["exp"] defaults.update({**self.DEFAULTS, **config}) cli_args = compact({ "cmd": cmd, "code": self.args.code, "data": self.args.data, "models": self.args.models, "metrics": self.args.metrics, "params": self.args.params, "plots": self.args.plots, "live": self.args.live, }) initialized_stage = init( self.repo, name=self.args.name, type=self.args.type, defaults=defaults, overrides=cli_args, interactive=self.args.interactive, force=self.args.force, ) text = ui.rich_text.assemble( "\n" if self.args.interactive else "", "Created ", (self.args.name, "bright_blue"), " stage in ", ("dvc.yaml", "green"), ".", ) if not self.args.run: text.append_text( ui.rich_text.assemble( " To run, use ", ('"dvc exp run"', "green"), ".\nSee ", (self.EXP_LINK, "repr.url"), ".", )) ui.write(text, styled=True) if self.args.run: return self.repo.experiments.run( targets=[initialized_stage.addressing]) return 0
def _remove_exp_by_names(repo, remote, exp_names: List[str]) -> int: remained = _remove_commited_exps(repo, remote, exp_names) if not remote: remained = _remove_queued_exps(repo, remained) if remained: raise InvalidArgumentError("'{}' is not a valid experiment".format( ";".join(remained))) return len(exp_names) - len(remained)
def _update_import_on_remote(stage, remote, jobs): if stage.is_repo_import: raise InvalidArgumentError( "Data imported from other DVC or Git repositories can't " "be updated with --to-remote") url = stage.deps[0].def_path remote = stage.repo.cloud.get_remote(remote, "update") stage.outs[0].transfer(url, odb=remote.odb, jobs=jobs, update=True)
def _update_import_on_remote(stage, remote, jobs): if stage.is_repo_import: raise InvalidArgumentError( "Can't update a repo import with --to-remote") url = stage.deps[0].path_info.url stage.outs[0].hash_info = stage.repo.cloud.transfer(url, jobs=jobs, remote=remote, command="update")
def _filter(filters, update_func): filters = [tuple(name.split(".")) for name in filters] for length, groups in groupby(filters, len): for group in groups: matches = [name for name in names if name[:length] == group] if not matches: name = ".".join(group) raise InvalidArgumentError( f"'{name}' does not match any known {label}") update_func({match: None for match in matches})
def run(self: "Repo", fname: str = None, no_exec: bool = False, single_stage: bool = False, **kwargs): from dvc.stage.utils import check_graphs, create_stage_from_cli if not kwargs.get("cmd"): raise InvalidArgumentError("command is not specified") stage_name = kwargs.get("name") if stage_name and single_stage: raise InvalidArgumentError( "`-n|--name` is incompatible with `--single-stage`") if stage_name and fname: raise InvalidArgumentError( "`--file` is currently incompatible with `-n|--name` " "and requires `--single-stage`") if not stage_name and not single_stage: raise InvalidArgumentError("`-n|--name` is required") stage = create_stage_from_cli(self, single_stage=single_stage, fname=fname, **kwargs) if kwargs.get("run_cache", True) and stage.can_be_skipped: return None check_graphs(self, stage, force=kwargs.get("force", True)) if no_exec: stage.ignore_outs() else: stage.run( no_commit=kwargs.get("no_commit", False), run_cache=kwargs.get("run_cache", True), ) stage.dump(update_lock=not no_exec) return stage
def _update_import_on_remote(stage, remote, jobs): if stage.is_repo_import: raise InvalidArgumentError( "Data imported from other DVC or Git repositories can't " "be updated with --to-remote" ) url = stage.deps[0].def_path stage.outs[0].hash_info = stage.repo.cloud.transfer( url, jobs=jobs, remote=remote, command="update" )
def _get_ref(ref_infos, name, cur_rev): if len(ref_infos) > 1: for info in ref_infos: if info.baseline_sha == cur_rev: return info msg = [(f"Ambiguous name '{name}' refers to multiple " "experiments. Use full refname to remove one of " "the following:")] msg.extend([f"\t{info}" for info in ref_infos]) raise InvalidArgumentError("\n".join(msg)) return ref_infos[0]