Exemple #1
0
    def dvcfile(self) -> "DVCFile":
        if self.path and self._dvcfile and self.path == self._dvcfile.path:
            return self._dvcfile

        if not self.path:
            raise DvcException("Stage does not have any path set "
                               "and is detached from dvcfile.")

        from dvc.dvcfile import make_dvcfile

        self._dvcfile = make_dvcfile(self.repo, self.path)
        return self._dvcfile
Exemple #2
0
def check_stage_exists(repo: "Repo", stage: Union["Stage", "PipelineStage"],
                       path: str):
    from dvc.dvcfile import make_dvcfile
    from dvc.stage import PipelineStage
    from dvc.stage.exceptions import (
        DuplicateStageName,
        StageFileAlreadyExistsError,
    )

    dvcfile = make_dvcfile(repo, path)
    if not dvcfile.exists():
        return

    hint = "Use '--force' to overwrite."
    if not isinstance(stage, PipelineStage):
        raise StageFileAlreadyExistsError(
            f"'{stage.relpath}' already exists. {hint}")
    elif stage.name and stage.name in dvcfile.stages:
        raise DuplicateStageName(
            f"Stage '{stage.name}' already exists in '{stage.relpath}'. {hint}"
        )
Exemple #3
0
def init(
    repo: "Repo",
    name: str = None,
    type: str = "default",  # pylint: disable=redefined-builtin
    defaults: Dict[str, str] = None,
    overrides: Dict[str, str] = None,
    interactive: bool = False,
    force: bool = False,
    stream: Optional[TextIO] = None,
) -> "Stage":
    from dvc.dvcfile import make_dvcfile

    dvcfile = make_dvcfile(repo, "dvc.yaml")
    name = name or type

    _check_stage_exists(dvcfile, name, force=force)

    defaults = defaults.copy() if defaults else {}
    overrides = overrides.copy() if overrides else {}

    with_live = type == "live"

    if interactive:
        defaults = init_interactive(
            name,
            validator=validate_prompts,
            defaults=defaults,
            live=with_live,
            provided=overrides,
            stream=stream,
        )
    else:
        if with_live:
            # suppress `metrics`/`plots` if live is selected, unless
            # it is also provided via overrides/cli.
            # This makes output to be a checkpoint as well.
            defaults.pop("metrics", None)
            defaults.pop("plots", None)
        else:
            defaults.pop("live", None)  # suppress live otherwise

    context: Dict[str, str] = {**defaults, **overrides}
    assert "cmd" in context

    params_kv = []
    params = context.get("params")
    if params:
        params_kv.append(loadd_params(params))

    checkpoint_out = bool(context.get("live"))
    models = context.get("models")
    stage = repo.stage.create(
        name=name,
        cmd=context["cmd"],
        deps=compact([context.get("code"),
                      context.get("data")]),
        params=params_kv,
        metrics_no_cache=compact([context.get("metrics")]),
        plots_no_cache=compact([context.get("plots")]),
        live=context.get("live"),
        force=force,
        **{"checkpoints" if checkpoint_out else "outs": compact([models])},
    )

    if interactive:
        ui.error_write(Rule(style="green"), styled=True)
        _yaml = dumps_yaml(to_pipeline_file(cast(PipelineStage, stage)))
        syn = Syntax(_yaml, "yaml", theme="ansi_dark")
        ui.error_write(syn, styled=True)

    from dvc.ui.prompt import Confirm

    if not interactive or Confirm.ask(
            "Do you want to add the above contents to dvc.yaml?",
            console=ui.error_console,
            default=True,
            stream=stream,
    ):
        with _disable_logging(), repo.scm_context(autostage=True, quiet=True):
            stage.dump(update_lock=False)
            stage.ignore_outs()
            if params:
                repo.scm_context.track_file(params)
    else:
        raise DvcException("Aborting ...")
    return stage
Exemple #4
0
def init(
    repo: "Repo",
    name: str = "train",
    type: str = "default",  # pylint: disable=redefined-builtin
    defaults: Dict[str, str] = None,
    overrides: Dict[str, str] = None,
    interactive: bool = False,
    force: bool = False,
    stream: Optional[TextIO] = None,
) -> Tuple[PipelineStage, List["Dependency"], List[str]]:
    from dvc.dvcfile import make_dvcfile

    dvcfile = make_dvcfile(repo, "dvc.yaml")
    _check_stage_exists(dvcfile, name, force=force)

    defaults = defaults.copy() if defaults else {}
    overrides = overrides.copy() if overrides else {}

    if interactive:
        defaults = init_interactive(
            validator=partial(validate_prompts, repo),
            defaults=defaults,
            provided=overrides,
            stream=stream,
        )
    else:
        if "live" in overrides:
            # suppress `metrics`/`plots` if live is selected.
            defaults.pop("metrics", None)
            defaults.pop("plots", None)
        else:
            defaults.pop("live", None)  # suppress live otherwise

    context: Dict[str, str] = {**defaults, **overrides}
    assert "cmd" in context

    params = context.get("params")
    if params:
        from dvc.dependency.param import (
            MissingParamsFile,
            ParamsDependency,
            ParamsIsADirectoryError,
        )

        try:
            ParamsDependency(None, params, repo=repo).validate_filepath()
        except ParamsIsADirectoryError as exc:
            raise DvcException(f"{exc}.")  # swallow cause for display
        except MissingParamsFile:
            pass

    models = context.get("models")
    live_path = context.pop("live", None)
    live_metrics = f"{live_path}.json" if live_path else None
    live_plots = os.path.join(live_path, "scalars") if live_path else None

    stage = repo.stage.create(
        name=name,
        cmd=context["cmd"],
        deps=compact([context.get("code"),
                      context.get("data")]),
        params=[{
            params: None
        }] if params else None,
        metrics_no_cache=compact([context.get("metrics"), live_metrics]),
        plots_no_cache=compact([context.get("plots"), live_plots]),
        force=force,
        **{
            "checkpoints" if type == "checkpoint" else "outs":
            compact([models])
        },
    )

    with _disable_logging(), repo.scm_context(autostage=True, quiet=True):
        stage.dump(update_lock=False)
        initialized_out_dirs = init_out_dirs(stage)
        stage.ignore_outs()
        initialized_deps = init_deps(stage)
        if params:
            repo.scm_context.track_file(params)

    assert isinstance(stage, PipelineStage)
    return stage, initialized_deps, initialized_out_dirs
Exemple #5
0
def init(
    repo: "Repo",
    name: str = None,
    type: str = "default",  # pylint: disable=redefined-builtin
    defaults: Dict[str, str] = None,
    overrides: Dict[str, str] = None,
    interactive: bool = False,
    force: bool = False,
) -> "Stage":
    from dvc.dvcfile import make_dvcfile

    dvcfile = make_dvcfile(repo, "dvc.yaml")
    name = name or type

    _check_stage_exists(dvcfile, name, force=force)

    defaults = defaults or {}
    overrides = overrides or {}

    with_live = type == "live"
    if interactive:
        defaults = init_interactive(
            name,
            defaults=defaults,
            live=with_live,
            provided=overrides,
            show_tree=True,
        )
    else:
        if with_live:
            # suppress `metrics`/`params` if live is selected, unless
            # it is also provided via overrides/cli.
            # This makes output to be a checkpoint as well.
            defaults.pop("metrics")
            defaults.pop("params")
        else:
            defaults.pop("live")  # suppress live otherwise

    context: Dict[str, str] = {**defaults, **overrides}
    assert "cmd" in context

    params_kv = []
    if context.get("params"):
        from dvc.utils.serialize import LOADERS

        path = context["params"]
        assert isinstance(path, str)
        _, ext = os.path.splitext(path)
        params_kv = [{path: list(LOADERS[ext](path))}]

    checkpoint_out = bool(context.get("live"))
    models = context.get("models")
    stage = repo.stage.create(
        name=name,
        cmd=context["cmd"],
        deps=compact([context.get("code"),
                      context.get("data")]),
        params=params_kv,
        metrics_no_cache=compact([context.get("metrics")]),
        plots_no_cache=compact([context.get("plots")]),
        live=context.get("live"),
        force=force,
        **{"checkpoints" if checkpoint_out else "outs": compact([models])},
    )

    if interactive:
        ui.write(Rule(style="green"), styled=True)
        _yaml = dumps_yaml(to_pipeline_file(cast(PipelineStage, stage)))
        syn = Syntax(_yaml, "yaml", theme="ansi_dark")
        ui.error_write(syn, styled=True)

    if not interactive or ui.confirm(
            "Do you want to add the above contents to dvc.yaml?"):
        with _disable_logging():
            stage.dump(update_lock=False)
        stage.ignore_outs()
    else:
        raise DvcException("Aborting ...")
    return stage
Exemple #6
0
def init(
    repo: "Repo",
    name: str = None,
    type: str = "default",  # pylint: disable=redefined-builtin
    defaults: Dict[str, str] = None,
    overrides: Dict[str, str] = None,
    interactive: bool = False,
    force: bool = False,
) -> "Stage":
    from dvc.dvcfile import make_dvcfile

    dvcfile = make_dvcfile(repo, "dvc.yaml")
    name = name or type

    _check_stage_exists(dvcfile, name, force=force)

    defaults = defaults or {}
    overrides = overrides or {}

    with_live = type == "live"
    if interactive:
        defaults = init_interactive(
            defaults=defaults or {},
            show_heading=not dvcfile.exists(),
            live=with_live,
            provided=overrides.keys(),
        )
    else:
        if with_live:
            # suppress `metrics`/`params` if live is selected, unless
            # it is also provided via overrides/cli.
            # This makes output to be a checkpoint as well.
            defaults.pop("metrics")
            defaults.pop("params")
        else:
            defaults.pop("live")  # suppress live otherwise

    context: Dict[str, str] = {**defaults, **overrides}
    assert "cmd" in context

    params_kv = []
    if context.get("params"):
        from dvc.utils.serialize import LOADERS

        path = context["params"]
        assert isinstance(path, str)
        _, ext = os.path.splitext(path)
        params_kv = [{path: list(LOADERS[ext](path))}]

    checkpoint_out = bool(context.get("live"))
    models = context.get("models")
    return repo.stage.add(
        name=name,
        cmd=context["cmd"],
        deps=compact([context.get("code"),
                      context.get("data")]),
        params=params_kv,
        metrics_no_cache=compact([context.get("metrics")]),
        plots_no_cache=compact([context.get("plots")]),
        live=context.get("live"),
        force=force,
        **{"checkpoints" if checkpoint_out else "outs": compact([models])},
    )
Exemple #7
0
    def run(self):
        from dvc.command.stage import parse_cmd

        cmd = parse_cmd(self.args.cmd)
        if not self.args.interactive and not cmd:
            raise InvalidArgumentError("command is not specified")

        from dvc.dvcfile import make_dvcfile

        global_defaults = {
            "code": self.CODE,
            "data": self.DATA,
            "models": self.MODELS,
            "metrics": self.DEFAULT_METRICS,
            "params": self.DEFAULT_PARAMS,
            "plots": self.PLOTS,
            "live": self.DVCLIVE,
        }

        dvcfile = make_dvcfile(self.repo, "dvc.yaml")
        name = self.args.name or self.args.type

        dvcfile_exists = dvcfile.exists()
        if not self.args.force and dvcfile_exists and name in dvcfile.stages:
            from dvc.stage.exceptions import DuplicateStageName

            hint = "Use '--force' to overwrite."
            raise DuplicateStageName(
                f"Stage '{name}' already exists in 'dvc.yaml'. {hint}"
            )

        context = ChainMap()
        if not self.args.explicit:
            config = {}  # TODO
            context.maps.extend([config, global_defaults])

        with_live = self.args.type == "live"
        if self.args.interactive:
            try:
                context = self.init_interactive(
                    defaults=context,
                    show_heading=not dvcfile_exists,
                    live=with_live,
                )
            except (KeyboardInterrupt, EOFError):
                ui.error_write()
                raise
        elif with_live:
            # suppress `metrics`/`params` if live is selected, unless
            # also provided via cli, also make output to be a checkpoint.
            context = context.new_child({"metrics": None, "params": None})
        else:
            # suppress live otherwise
            context = context.new_child({"live": None})

        if not self.args.interactive:
            d = compact(
                {
                    "cmd": cmd,
                    "code": self.args.code,
                    "data": self.args.data,
                    "models": self.args.models,
                    "metrics": self.args.metrics,
                    "params": self.args.params,
                    "plots": self.args.plots,
                    "live": self.args.live,
                }
            )
            context = context.new_child(d)

        assert "cmd" in context
        command = context["cmd"]
        code = context.get("code")
        data = context.get("data")
        models = context.get("models")
        metrics = context.get("metrics")
        plots = context.get("plots")
        live = context.get("live")

        params_kv = []
        if context.get("params"):
            from dvc.utils.serialize import LOADERS

            path = context["params"]
            _, ext = os.path.splitext(path)
            params_kv = [{path: list(LOADERS[ext](path))}]

        checkpoint_out = bool(context.get("live"))
        stage = self.repo.stage.add(
            name=name,
            cmd=command,
            deps=compact([code, data]),
            params=params_kv,
            metrics_no_cache=compact([metrics]),
            plots_no_cache=compact([plots]),
            live=live,
            force=self.args.force,
            **{"checkpoints" if checkpoint_out else "outs": compact([models])},
        )

        if self.args.run:
            return self.repo.experiments.run(targets=[stage.addressing])
        return 0