コード例 #1
0
def test_frozen(dvc):
    stage = create_stage(
        PipelineStage, dvc, outs=["output"], deps=["input"], **kwargs
    )
    assert stage.PARAM_FROZEN not in to_pipeline_file(stage)["something"]

    stage = create_stage(PipelineStage, dvc, **kwargs, frozen=True)
    assert to_pipeline_file(stage)["something"][stage.PARAM_FROZEN] is True
コード例 #2
0
def test_always_changed(dvc):
    stage = create_stage(PipelineStage,
                         dvc,
                         outs=["output"],
                         deps=["input"],
                         **kwargs)
    assert (stage.PARAM_ALWAYS_CHANGED
            not in to_pipeline_file(stage)["something"])

    stage = create_stage(PipelineStage, dvc, **kwargs, always_changed=True)
    assert (to_pipeline_file(stage)["something"][stage.PARAM_ALWAYS_CHANGED] is
            True)
コード例 #3
0
def test_fill_from_lock_params(dvc, lock_data):
    stage = create_stage(
        PipelineStage,
        dvc,
        PIPELINE_FILE,
        deps=["foo"],
        outs=["bar"],
        params=[
            "lorem",
            "lorem.ipsum",
            {"myparams.yaml": ["ipsum", "foobar"]},
        ],
    )
    lock_data["params"] = {
        "params.yaml": {
            "lorem": "lorem",
            "lorem.ipsum": ["i", "p", "s", "u", "m"],
        },
        "myparams.yaml": {
            # missing value in lock for `foobar` params
            "ipsum": "ipsum"
        },
    }
    params_deps = split_params_deps(stage)[0]
    assert set(params_deps[0].params) == {"lorem", "lorem.ipsum"}
    assert set(params_deps[1].params) == {"ipsum", "foobar"}
    assert not params_deps[0].hash_info
    assert not params_deps[1].hash_info

    StageLoader.fill_from_lock(stage, lock_data)
    assert params_deps[0].hash_info.value == lock_data["params"]["params.yaml"]
    assert (
        params_deps[1].hash_info.value == lock_data["params"]["myparams.yaml"]
    )
コード例 #4
0
def test_params_file_sorted(dvc):
    params = [
        "lorem",
        "ipsum",
        {
            "custom.yaml": ["wxyz", "pqrs", "baz"]
        },
        {
            "a-file-of-params.yaml": ["barr"]
        },
    ]
    stage = create_stage(PipelineStage,
                         dvc,
                         outs=["bar"],
                         deps=["foo"],
                         params=params,
                         **kwargs)
    assert to_pipeline_file(stage)["something"][stage.PARAM_PARAMS] == [
        "ipsum",
        "lorem",
        {
            "a-file-of-params.yaml": ["barr"]
        },
        {
            "custom.yaml": ["baz", "pqrs", "wxyz"]
        },
    ]
コード例 #5
0
def test_lock_outs(dvc, typ):
    stage = create_stage(PipelineStage, dvc, **{typ: ["input"]}, **kwargs)
    stage.outs[0].info = {"md5": "md-five"}
    assert to_single_stage_lockfile(stage) == OrderedDict([
        ("cmd", "command"),
        ("outs", [OrderedDict([("path", "input"), ("md5", "md-five")])]),
    ])
コード例 #6
0
    def _create_stage(self, cache, wdir=None):
        from dvc.stage import create_stage, PipelineStage

        params = []
        for param in cache.get("params", []):
            if isinstance(param, str):
                params.append(param)
                continue

            assert isinstance(param, dict)
            assert len(param) == 1
            path = list(param.keys())[0]
            params_list = param[path]
            assert isinstance(params_list, list)
            params.append(f"{path}:" + ",".join(params_list))

        stage = create_stage(
            PipelineStage,
            repo=self.repo,
            path="dvc.yaml",
            cmd=cache["cmd"],
            wdir=wdir,
            params=params,
            deps=[dep["path"] for dep in cache.get("deps", [])],
            outs=[out["path"] for out in cache["outs"]],
        )
        StageLoader.fill_from_lock(stage, cache)
        return stage
コード例 #7
0
def test_order(dvc):
    stage = create_stage(PipelineStage,
                         dvc,
                         deps=["input"],
                         outs=["output"],
                         params=["foo-param"],
                         **kwargs)
    params, deps = split_params_deps(stage)

    deps[0].info = {"md5": "md-five"}
    params[0].info = {"foo-param": "value"}
    stage.outs[0].info = {"md5": "md5-output"}

    assert to_single_stage_lockfile(stage) == OrderedDict([
        ("cmd", "command"),
        ("deps", [{
            "path": "input",
            "md5": "md-five"
        }]),
        ("params", {
            "params.yaml": {
                "foo-param": "value"
            }
        }),
        ("outs", [{
            "path": "output",
            "md5": "md5-output"
        }]),
    ])
コード例 #8
0
def _create_stages(repo, targets, fname, pbar=None, external=False):
    from dvc.stage import Stage, create_stage

    stages = []

    for out in Tqdm(
        targets,
        desc="Creating DVC-files",
        disable=len(targets) < LARGE_DIR_SIZE,
        unit="file",
    ):
        path, wdir, out = resolve_paths(repo, out)
        stage = create_stage(
            Stage,
            repo,
            fname or path,
            wdir=wdir,
            outs=[out],
            external=external,
        )
        if stage:
            Dvcfile(repo, stage.path).remove_with_prompt(force=True)

        repo._reset()

        if not stage:
            if pbar is not None:
                pbar.total -= 1
            continue

        stages.append(stage)
        if pbar is not None:
            pbar.update_msg(out)

    return stages
コード例 #9
0
def run(self, fname=None, no_exec=False, **kwargs):
    from dvc.stage import PipelineStage, Stage, create_stage
    from dvc.dvcfile import Dvcfile, PIPELINE_FILE

    stage_cls = PipelineStage
    path = PIPELINE_FILE
    stage_name = kwargs.get("name")
    if not stage_name:
        kwargs.pop("name", None)
        stage_cls = Stage
        path = fname or _get_file_path(kwargs)
    else:
        if not is_valid_name(stage_name):
            raise InvalidStageName

    stage = create_stage(stage_cls, repo=self, path=path, **kwargs)
    if stage is None:
        return None

    dvcfile = Dvcfile(self, stage.path)
    if dvcfile.exists():
        if stage_name and stage_name in dvcfile.stages:
            raise DuplicateStageName(stage_name, dvcfile)
        if stage_cls != PipelineStage:
            dvcfile.remove_with_prompt(force=kwargs.get("overwrite", True))

    try:
        self.check_modified_graph([stage])
    except OutputDuplicationError as exc:
        raise OutputDuplicationError(exc.output, set(exc.stages) - {stage})

    if not no_exec:
        stage.run(no_commit=kwargs.get("no_commit", False))
    dvcfile.dump(stage, update_pipeline=True)
    return stage
コード例 #10
0
def test_lock_deps(dvc):
    stage = create_stage(PipelineStage, dvc, deps=["input"], **kwargs)
    stage.deps[0].info = {"md5": "md-five"}
    assert to_single_stage_lockfile(stage) == OrderedDict([
        ("cmd", "command"),
        ("deps", [OrderedDict([("path", "input"), ("md5", "md-five")])]),
    ])
コード例 #11
0
def test_outs_and_outs_flags_are_sorted(dvc, typ, extra):
    stage = create_stage(PipelineStage, dvc, deps=["input"], **kwargs)
    stage.outs += output.loads_from(stage, ["barr"], use_cache=False, **extra)
    stage.outs += output.loads_from(stage, ["foobar"],
                                    use_cache=False,
                                    persist=True,
                                    **extra)
    stage.outs += output.loads_from(stage, ["foo"], persist=True, **extra)
    stage.outs += output.loads_from(stage, ["bar"], **extra)

    serialized_outs = to_pipeline_file(stage)["something"][typ]
    assert serialized_outs == [
        "bar",
        {
            "barr": {
                "cache": False
            }
        },
        {
            "foo": {
                "persist": True
            }
        },
        {
            "foobar": {
                "cache": False,
                "persist": True
            }
        },
    ]
    assert list(serialized_outs[3]["foobar"].keys()) == ["cache", "persist"]
コード例 #12
0
def test_lock_params_file_sorted(dvc):
    stage = create_stage(PipelineStage,
                         dvc,
                         params=[
                             "lorem.ipsum",
                             "abc",
                             {
                                 "myparams.yaml": ["foo", "foobar"]
                             },
                             {
                                 "a-params-file.yaml": ["bar", "barr"]
                             },
                         ],
                         **kwargs)
    stage.deps[0].info = {"lorem.ipsum": {"lorem1": 1, "lorem2": 2}, "abc": 3}
    stage.deps[1].info = {"foo": ["f", "o", "o"], "foobar": "foobar"}
    stage.deps[2].info = {"bar": ["b", "a", "r"], "barr": "barr"}
    assert to_single_stage_lockfile(stage)["params"] == OrderedDict([
        (
            DEFAULT_PARAMS_FILE,
            OrderedDict([("abc", 3), ("lorem.ipsum", {
                "lorem1": 1,
                "lorem2": 2
            })]),
        ),
        (
            "a-params-file.yaml",
            OrderedDict([("bar", ["b", "a", "r"]), ("barr", "barr")]),
        ),
        (
            "myparams.yaml",
            OrderedDict([("foo", ["f", "o", "o"]), ("foobar", "foobar")]),
        ),
    ])
コード例 #13
0
def imp_url(
    self,
    url,
    out=None,
    fname=None,
    erepo=None,
    frozen=True,
    no_exec=False,
    desc=None,
    jobs=None,
):
    from dvc.dvcfile import Dvcfile
    from dvc.stage import Stage, create_stage, restore_meta

    out = resolve_output(url, out)
    path, wdir, out = resolve_paths(self, out)

    # NOTE: when user is importing something from within their own repository
    if (
        erepo is None
        and os.path.exists(url)
        and path_isin(os.path.abspath(url), self.root_dir)
    ):
        url = relpath(url, wdir)

    stage = create_stage(
        Stage,
        self,
        fname or path,
        wdir=wdir,
        deps=[url],
        outs=[out],
        erepo=erepo,
    )
    restore_meta(stage)
    if stage.can_be_skipped:
        return None

    if desc:
        stage.outs[0].desc = desc

    dvcfile = Dvcfile(self, stage.path)
    dvcfile.remove()

    try:
        self.check_modified_graph([stage])
    except OutputDuplicationError as exc:
        raise OutputDuplicationError(exc.output, set(exc.stages) - {stage})

    if no_exec:
        stage.ignore_outs()
    else:
        stage.run(jobs=jobs)

    stage.frozen = frozen

    dvcfile.dump(stage)

    return stage
コード例 #14
0
ファイル: run.py プロジェクト: rjsears/dvc
def run(self, fname=None, no_exec=False, single_stage=False, **kwargs):
    from dvc.stage import PipelineStage, Stage, create_stage
    from dvc.dvcfile import Dvcfile, PIPELINE_FILE

    if not kwargs.get("cmd"):
        raise InvalidArgumentError("command is not specified")

    stage_cls = PipelineStage
    path = PIPELINE_FILE
    stage_name = kwargs.get("name")

    if stage_name and single_stage:
        raise InvalidArgumentError(
            "`-n|--name` is incompatible with `--single-stage`")

    if not stage_name and not single_stage:
        raise InvalidArgumentError("`-n|--name` is required")

    if single_stage:
        kwargs.pop("name", None)
        stage_cls = Stage
        path = fname or _get_file_path(kwargs)
    else:
        if not is_valid_name(stage_name):
            raise InvalidStageName

    params = parse_params(kwargs.pop("params", []))
    stage = create_stage(stage_cls,
                         repo=self,
                         path=path,
                         params=params,
                         **kwargs)
    if stage is None:
        return None

    dvcfile = Dvcfile(self, stage.path)
    if dvcfile.exists():
        if kwargs.get("overwrite", True):
            dvcfile.remove_stage(stage)
        elif stage_cls != PipelineStage:
            raise StageFileAlreadyExistsError(dvcfile.relpath)
        elif stage_name and stage_name in dvcfile.stages:
            raise DuplicateStageName(stage_name, dvcfile)

    try:
        self.check_modified_graph([stage])
    except OutputDuplicationError as exc:
        raise OutputDuplicationError(exc.output, set(exc.stages) - {stage})

    if no_exec:
        stage.ignore_outs()
    else:
        stage.run(
            no_commit=kwargs.get("no_commit", False),
            run_cache=kwargs.get("run_cache", True),
        )

    dvcfile.dump(stage, update_pipeline=True, no_lock=no_exec)
    return stage
コード例 #15
0
def test_fill_from_lock_empty_data(dvc):
    stage = create_stage(
        PipelineStage, dvc, PIPELINE_FILE, deps=["foo"], outs=["bar"]
    )
    StageLoader.fill_from_lock(stage, None)
    assert not stage.deps[0].hash_info and not stage.outs[0].hash_info
    StageLoader.fill_from_lock(stage, {})
    assert not stage.deps[0].hash_info and not stage.outs[0].hash_info
コード例 #16
0
def test_plot_props(dvc):
    props = {"x": "1"}
    stage = create_stage(PipelineStage, dvc, plots=["plot_file"], **kwargs)
    stage.outs[0].plot = props

    assert to_pipeline_file(stage)["something"][stage.PARAM_PLOTS] == [
        {"plot_file": props}
    ]
コード例 #17
0
ファイル: stage.py プロジェクト: jhhuh/dvc
    def create(
        self,
        single_stage: bool = False,
        validate: bool = True,
        fname: str = None,
        force: bool = False,
        **stage_data,
    ) -> Union["Stage", "PipelineStage"]:
        """Creates a stage.

        Args:
            single_stage: if true, the .dvc file based stage is created,
                fname is required in that case
            fname: name of the file to use, not used for dvc.yaml files
            validate: if true, the new created stage is checked against the
                stages in the repo. Eg: graph correctness,
                potential overwrites in dvc.yaml file (unless `force=True`).
            force: ignores overwrites in dvc.yaml file
            stage_data: Stage data to create from
                (see create_stage and loads_from for more information)
        """
        from dvc.stage import PipelineStage, Stage, create_stage, restore_meta
        from dvc.stage.exceptions import InvalidStageName
        from dvc.stage.utils import (
            is_valid_name,
            prepare_file_path,
            validate_kwargs,
        )

        stage_data = validate_kwargs(single_stage=single_stage,
                                     fname=fname,
                                     **stage_data)
        if single_stage:
            stage_cls = Stage
            path = fname or prepare_file_path(stage_data)
        else:
            path = PIPELINE_FILE
            stage_cls = PipelineStage
            stage_name = stage_data["name"]
            if not (stage_name and is_valid_name(stage_name)):
                raise InvalidStageName

        stage = create_stage(stage_cls,
                             repo=self.repo,
                             path=path,
                             **stage_data)
        if validate:
            if not force:
                from dvc.stage.utils import check_stage_exists

                check_stage_exists(self.repo, stage, stage.path)

            new_index = self.repo.index.add(stage)
            new_index.check_graph()

        restore_meta(stage)
        return stage
コード例 #18
0
def test_wdir(dvc):
    stage = create_stage(PipelineStage, dvc, **kwargs)
    assert stage.PARAM_WDIR not in to_pipeline_file(stage)["something"]

    stage.wdir = os.curdir
    assert stage.PARAM_WDIR not in to_pipeline_file(stage)["something"]

    stage.wdir = "some-dir"
    assert to_pipeline_file(stage)["something"][stage.PARAM_WDIR] == "some-dir"
コード例 #19
0
def test_order_deps_outs(dvc, typ):
    all_types = ["deps", "params", "outs", "metrics", "plots"]
    all_types = [item for item in all_types if item != typ]
    extra = {key: [f"foo-{i}"] for i, key in enumerate(all_types)}

    stage = create_stage(PipelineStage, dvc, **kwargs, **extra)
    assert typ not in to_pipeline_file(stage)["something"]
    assert (list(to_pipeline_file(stage)["something"].keys()) == ["cmd"] +
            all_types)
コード例 #20
0
    def make(path="dvc.yaml", name="dummy_stage", **kwargs):
        from dvc.stage import PipelineStage, create_stage

        cmd = kwargs.get("cmd", "command")
        stage = create_stage(
            PipelineStage, dvc, path, name=name, cmd=cmd, **kwargs
        )
        stage.dump()
        return stage
コード例 #21
0
def test_deps_sorted(dvc):
    stage = create_stage(
        PipelineStage, dvc, deps=["a", "quick", "lazy", "fox"], **kwargs
    )
    assert to_pipeline_file(stage)["something"][stage.PARAM_DEPS] == [
        "a",
        "fox",
        "lazy",
        "quick",
    ]
コード例 #22
0
    def make(path="dvc.yaml", name="dummy_stage", **kwargs):
        from dvc.stage import PipelineStage, create_stage

        stage = create_stage(PipelineStage,
                             dvc,
                             path,
                             name=name,
                             cmd="",
                             **kwargs)
        stage.dump()
        return stage
コード例 #23
0
ファイル: add.py プロジェクト: sandeepmistry/dvc
def _create_stages(
    repo,
    targets,
    fname,
    pbar=None,
    external=False,
    glob=False,
    desc=None,
):
    from glob import iglob

    from dvc.stage import Stage, create_stage

    if glob:
        expanded_targets = [
            exp_target for target in targets
            for exp_target in iglob(target, recursive=True)
        ]
    else:
        expanded_targets = targets

    stages = []
    for out in Tqdm(
            expanded_targets,
            desc="Creating DVC-files",
            disable=len(expanded_targets) < LARGE_DIR_SIZE,
            unit="file",
    ):
        path, wdir, out = resolve_paths(repo, out)
        stage = create_stage(
            Stage,
            repo,
            fname or path,
            wdir=wdir,
            outs=[out],
            external=external,
        )
        if stage:
            Dvcfile(repo, stage.path).remove()
            if desc:
                stage.outs[0].desc = desc

        repo._reset()  # pylint: disable=protected-access

        if not stage:
            if pbar is not None:
                pbar.total -= 1
            continue

        stages.append(stage)
        if pbar is not None:
            pbar.update_msg(out)

    return stages
コード例 #24
0
ファイル: add.py プロジェクト: vijay-pinjala/dvc
def _create_stages(
    repo,
    targets,
    fname,
    pbar=None,
    external=False,
    glob=False,
    desc=None,
    transfer=False,
    **kwargs,
):
    from dvc.dvcfile import Dvcfile
    from dvc.stage import Stage, create_stage, restore_meta

    expanded_targets = glob_targets(targets, glob=glob)

    stages = []
    for out in Tqdm(
            expanded_targets,
            desc="Creating DVC files",
            disable=len(expanded_targets) < LARGE_DIR_SIZE,
            unit="file",
    ):
        if kwargs.get("out"):
            out = resolve_output(out, kwargs["out"])
        path, wdir, out = resolve_paths(repo,
                                        out,
                                        always_local=transfer
                                        and not kwargs.get("out"))
        stage = create_stage(
            Stage,
            repo,
            fname or path,
            wdir=wdir,
            outs=[out],
            external=external,
        )
        restore_meta(stage)
        Dvcfile(repo, stage.path).remove()
        if desc:
            stage.outs[0].desc = desc

        repo._reset()  # pylint: disable=protected-access

        if not stage:
            if pbar is not None:
                pbar.total -= 1
            continue

        stages.append(stage)
        if pbar is not None:
            pbar.update_msg(out)

    return stages
コード例 #25
0
def test_fill_from_lock_missing_params_section(dvc, lock_data):
    stage = create_stage(
        PipelineStage,
        dvc,
        PIPELINE_FILE,
        deps=["foo"],
        outs=["bar"],
        params=["lorem", "lorem.ipsum", {"myparams.yaml": ["ipsum"]}],
    )
    params_deps = split_params_deps(stage)[0]
    StageLoader.fill_from_lock(stage, lock_data)
    assert not params_deps[0].hash_info and not params_deps[1].hash_info
コード例 #26
0
def test_lock_params(dvc):
    stage = create_stage(PipelineStage,
                         dvc,
                         params=["lorem.ipsum", "abc"],
                         **kwargs)
    stage.deps[0].info = {"lorem.ipsum": {"lorem1": 1, "lorem2": 2}, "abc": 3}
    assert to_single_stage_lockfile(
        stage)["params"][DEFAULT_PARAMS_FILE] == OrderedDict([("abc", 3),
                                                              ("lorem.ipsum", {
                                                                  "lorem1": 1,
                                                                  "lorem2": 2
                                                              })])
コード例 #27
0
def test_fill_from_lock_use_appropriate_checksum(dvc, lock_data):
    stage = create_stage(
        PipelineStage,
        dvc,
        PIPELINE_FILE,
        deps=["s3://dvc-temp/foo"],
        outs=["bar"],
    )
    lock_data["deps"] = [{"path": "s3://dvc-temp/foo", "etag": "e-tag"}]
    StageLoader.fill_from_lock(stage, lock_data)
    assert stage.deps[0].hash_info == HashInfo("etag", "e-tag")
    assert stage.outs[0].hash_info == HashInfo("md5", "bar_checksum")
コード例 #28
0
def test_fill_from_lock_deps_outs(dvc, lock_data):
    stage = create_stage(
        PipelineStage, dvc, PIPELINE_FILE, deps=["foo"], outs=["bar"]
    )

    for item in chain(stage.deps, stage.outs):
        assert not item.hash_info

    StageLoader.fill_from_lock(stage, lock_data)

    assert stage.deps[0].hash_info == HashInfo("md5", "foo_checksum")
    assert stage.outs[0].hash_info == HashInfo("md5", "bar_checksum")
コード例 #29
0
ファイル: test_cache.py プロジェクト: rattanjotsingh/dvc
def test_unhashable(tmp_dir, dvc, mocker, kwargs):
    from dvc.stage import Stage, create_stage
    from dvc.stage.cache import RunCacheNotFoundError, StageCache

    cache = StageCache(dvc)
    stage = create_stage(Stage, path="stage.dvc", repo=dvc, **kwargs)
    get_stage_hash = mocker.patch("dvc.stage.cache._get_stage_hash")
    assert cache.save(stage) is None
    assert get_stage_hash.not_called
    with pytest.raises(RunCacheNotFoundError):
        cache.restore(stage)
    assert get_stage_hash.not_called
コード例 #30
0
def test_dump_nondefault_hash(dvc):
    stage = create_stage(PipelineStage,
                         dvc,
                         deps=["s3://dvc-temp/file"],
                         **kwargs)
    stage.deps[0].info = {"md5": "value"}
    assert to_single_stage_lockfile(stage) == OrderedDict([
        ("cmd", "command"),
        (
            "deps",
            [OrderedDict([("path", "s3://dvc-temp/file"), ("md5", "value")])],
        ),
    ])