def dvcfile(self) -> "DVCFile": if self.path and self._dvcfile and self.path == self._dvcfile.path: return self._dvcfile if not self.path: raise DvcException("Stage does not have any path set " "and is detached from dvcfile.") from dvc.dvcfile import make_dvcfile self._dvcfile = make_dvcfile(self.repo, self.path) return self._dvcfile
def check_stage_exists(repo: "Repo", stage: Union["Stage", "PipelineStage"], path: str): from dvc.dvcfile import make_dvcfile from dvc.stage import PipelineStage from dvc.stage.exceptions import ( DuplicateStageName, StageFileAlreadyExistsError, ) dvcfile = make_dvcfile(repo, path) if not dvcfile.exists(): return hint = "Use '--force' to overwrite." if not isinstance(stage, PipelineStage): raise StageFileAlreadyExistsError( f"'{stage.relpath}' already exists. {hint}") elif stage.name and stage.name in dvcfile.stages: raise DuplicateStageName( f"Stage '{stage.name}' already exists in '{stage.relpath}'. {hint}" )
def init( repo: "Repo", name: str = None, type: str = "default", # pylint: disable=redefined-builtin defaults: Dict[str, str] = None, overrides: Dict[str, str] = None, interactive: bool = False, force: bool = False, stream: Optional[TextIO] = None, ) -> "Stage": from dvc.dvcfile import make_dvcfile dvcfile = make_dvcfile(repo, "dvc.yaml") name = name or type _check_stage_exists(dvcfile, name, force=force) defaults = defaults.copy() if defaults else {} overrides = overrides.copy() if overrides else {} with_live = type == "live" if interactive: defaults = init_interactive( name, validator=validate_prompts, defaults=defaults, live=with_live, provided=overrides, stream=stream, ) else: if with_live: # suppress `metrics`/`plots` if live is selected, unless # it is also provided via overrides/cli. # This makes output to be a checkpoint as well. defaults.pop("metrics", None) defaults.pop("plots", None) else: defaults.pop("live", None) # suppress live otherwise context: Dict[str, str] = {**defaults, **overrides} assert "cmd" in context params_kv = [] params = context.get("params") if params: params_kv.append(loadd_params(params)) checkpoint_out = bool(context.get("live")) models = context.get("models") stage = repo.stage.create( name=name, cmd=context["cmd"], deps=compact([context.get("code"), context.get("data")]), params=params_kv, metrics_no_cache=compact([context.get("metrics")]), plots_no_cache=compact([context.get("plots")]), live=context.get("live"), force=force, **{"checkpoints" if checkpoint_out else "outs": compact([models])}, ) if interactive: ui.error_write(Rule(style="green"), styled=True) _yaml = dumps_yaml(to_pipeline_file(cast(PipelineStage, stage))) syn = Syntax(_yaml, "yaml", theme="ansi_dark") ui.error_write(syn, styled=True) from dvc.ui.prompt import Confirm if not interactive or Confirm.ask( "Do you want to add the above contents to dvc.yaml?", console=ui.error_console, default=True, stream=stream, ): with _disable_logging(), repo.scm_context(autostage=True, quiet=True): stage.dump(update_lock=False) stage.ignore_outs() if params: repo.scm_context.track_file(params) else: raise DvcException("Aborting ...") return stage
def init( repo: "Repo", name: str = "train", type: str = "default", # pylint: disable=redefined-builtin defaults: Dict[str, str] = None, overrides: Dict[str, str] = None, interactive: bool = False, force: bool = False, stream: Optional[TextIO] = None, ) -> Tuple[PipelineStage, List["Dependency"], List[str]]: from dvc.dvcfile import make_dvcfile dvcfile = make_dvcfile(repo, "dvc.yaml") _check_stage_exists(dvcfile, name, force=force) defaults = defaults.copy() if defaults else {} overrides = overrides.copy() if overrides else {} if interactive: defaults = init_interactive( validator=partial(validate_prompts, repo), defaults=defaults, provided=overrides, stream=stream, ) else: if "live" in overrides: # suppress `metrics`/`plots` if live is selected. defaults.pop("metrics", None) defaults.pop("plots", None) else: defaults.pop("live", None) # suppress live otherwise context: Dict[str, str] = {**defaults, **overrides} assert "cmd" in context params = context.get("params") if params: from dvc.dependency.param import ( MissingParamsFile, ParamsDependency, ParamsIsADirectoryError, ) try: ParamsDependency(None, params, repo=repo).validate_filepath() except ParamsIsADirectoryError as exc: raise DvcException(f"{exc}.") # swallow cause for display except MissingParamsFile: pass models = context.get("models") live_path = context.pop("live", None) live_metrics = f"{live_path}.json" if live_path else None live_plots = os.path.join(live_path, "scalars") if live_path else None stage = repo.stage.create( name=name, cmd=context["cmd"], deps=compact([context.get("code"), context.get("data")]), params=[{ params: None }] if params else None, metrics_no_cache=compact([context.get("metrics"), live_metrics]), plots_no_cache=compact([context.get("plots"), live_plots]), force=force, **{ "checkpoints" if type == "checkpoint" else "outs": compact([models]) }, ) with _disable_logging(), repo.scm_context(autostage=True, quiet=True): stage.dump(update_lock=False) initialized_out_dirs = init_out_dirs(stage) stage.ignore_outs() initialized_deps = init_deps(stage) if params: repo.scm_context.track_file(params) assert isinstance(stage, PipelineStage) return stage, initialized_deps, initialized_out_dirs
def init( repo: "Repo", name: str = None, type: str = "default", # pylint: disable=redefined-builtin defaults: Dict[str, str] = None, overrides: Dict[str, str] = None, interactive: bool = False, force: bool = False, ) -> "Stage": from dvc.dvcfile import make_dvcfile dvcfile = make_dvcfile(repo, "dvc.yaml") name = name or type _check_stage_exists(dvcfile, name, force=force) defaults = defaults or {} overrides = overrides or {} with_live = type == "live" if interactive: defaults = init_interactive( name, defaults=defaults, live=with_live, provided=overrides, show_tree=True, ) else: if with_live: # suppress `metrics`/`params` if live is selected, unless # it is also provided via overrides/cli. # This makes output to be a checkpoint as well. defaults.pop("metrics") defaults.pop("params") else: defaults.pop("live") # suppress live otherwise context: Dict[str, str] = {**defaults, **overrides} assert "cmd" in context params_kv = [] if context.get("params"): from dvc.utils.serialize import LOADERS path = context["params"] assert isinstance(path, str) _, ext = os.path.splitext(path) params_kv = [{path: list(LOADERS[ext](path))}] checkpoint_out = bool(context.get("live")) models = context.get("models") stage = repo.stage.create( name=name, cmd=context["cmd"], deps=compact([context.get("code"), context.get("data")]), params=params_kv, metrics_no_cache=compact([context.get("metrics")]), plots_no_cache=compact([context.get("plots")]), live=context.get("live"), force=force, **{"checkpoints" if checkpoint_out else "outs": compact([models])}, ) if interactive: ui.write(Rule(style="green"), styled=True) _yaml = dumps_yaml(to_pipeline_file(cast(PipelineStage, stage))) syn = Syntax(_yaml, "yaml", theme="ansi_dark") ui.error_write(syn, styled=True) if not interactive or ui.confirm( "Do you want to add the above contents to dvc.yaml?"): with _disable_logging(): stage.dump(update_lock=False) stage.ignore_outs() else: raise DvcException("Aborting ...") return stage
def init( repo: "Repo", name: str = None, type: str = "default", # pylint: disable=redefined-builtin defaults: Dict[str, str] = None, overrides: Dict[str, str] = None, interactive: bool = False, force: bool = False, ) -> "Stage": from dvc.dvcfile import make_dvcfile dvcfile = make_dvcfile(repo, "dvc.yaml") name = name or type _check_stage_exists(dvcfile, name, force=force) defaults = defaults or {} overrides = overrides or {} with_live = type == "live" if interactive: defaults = init_interactive( defaults=defaults or {}, show_heading=not dvcfile.exists(), live=with_live, provided=overrides.keys(), ) else: if with_live: # suppress `metrics`/`params` if live is selected, unless # it is also provided via overrides/cli. # This makes output to be a checkpoint as well. defaults.pop("metrics") defaults.pop("params") else: defaults.pop("live") # suppress live otherwise context: Dict[str, str] = {**defaults, **overrides} assert "cmd" in context params_kv = [] if context.get("params"): from dvc.utils.serialize import LOADERS path = context["params"] assert isinstance(path, str) _, ext = os.path.splitext(path) params_kv = [{path: list(LOADERS[ext](path))}] checkpoint_out = bool(context.get("live")) models = context.get("models") return repo.stage.add( name=name, cmd=context["cmd"], deps=compact([context.get("code"), context.get("data")]), params=params_kv, metrics_no_cache=compact([context.get("metrics")]), plots_no_cache=compact([context.get("plots")]), live=context.get("live"), force=force, **{"checkpoints" if checkpoint_out else "outs": compact([models])}, )
def run(self): from dvc.command.stage import parse_cmd cmd = parse_cmd(self.args.cmd) if not self.args.interactive and not cmd: raise InvalidArgumentError("command is not specified") from dvc.dvcfile import make_dvcfile global_defaults = { "code": self.CODE, "data": self.DATA, "models": self.MODELS, "metrics": self.DEFAULT_METRICS, "params": self.DEFAULT_PARAMS, "plots": self.PLOTS, "live": self.DVCLIVE, } dvcfile = make_dvcfile(self.repo, "dvc.yaml") name = self.args.name or self.args.type dvcfile_exists = dvcfile.exists() if not self.args.force and dvcfile_exists and name in dvcfile.stages: from dvc.stage.exceptions import DuplicateStageName hint = "Use '--force' to overwrite." raise DuplicateStageName( f"Stage '{name}' already exists in 'dvc.yaml'. {hint}" ) context = ChainMap() if not self.args.explicit: config = {} # TODO context.maps.extend([config, global_defaults]) with_live = self.args.type == "live" if self.args.interactive: try: context = self.init_interactive( defaults=context, show_heading=not dvcfile_exists, live=with_live, ) except (KeyboardInterrupt, EOFError): ui.error_write() raise elif with_live: # suppress `metrics`/`params` if live is selected, unless # also provided via cli, also make output to be a checkpoint. context = context.new_child({"metrics": None, "params": None}) else: # suppress live otherwise context = context.new_child({"live": None}) if not self.args.interactive: d = compact( { "cmd": cmd, "code": self.args.code, "data": self.args.data, "models": self.args.models, "metrics": self.args.metrics, "params": self.args.params, "plots": self.args.plots, "live": self.args.live, } ) context = context.new_child(d) assert "cmd" in context command = context["cmd"] code = context.get("code") data = context.get("data") models = context.get("models") metrics = context.get("metrics") plots = context.get("plots") live = context.get("live") params_kv = [] if context.get("params"): from dvc.utils.serialize import LOADERS path = context["params"] _, ext = os.path.splitext(path) params_kv = [{path: list(LOADERS[ext](path))}] checkpoint_out = bool(context.get("live")) stage = self.repo.stage.add( name=name, cmd=command, deps=compact([code, data]), params=params_kv, metrics_no_cache=compact([metrics]), plots_no_cache=compact([plots]), live=live, force=self.args.force, **{"checkpoints" if checkpoint_out else "outs": compact([models])}, ) if self.args.run: return self.repo.experiments.run(targets=[stage.addressing]) return 0