コード例 #1
0
ファイル: test_repro.py プロジェクト: vijay-pinjala/dvc
    def test(self):
        """
        Check that adding/removing metrics doesn't affect stage state
        """
        stages = self.dvc.add(self.FOO)
        self.assertEqual(len(stages), 1)
        self.assertTrue(stages[0] is not None)

        file1 = "file1"
        file1_stage = file1 + ".dvc"
        self.dvc.run(
            fname=file1_stage,
            outs_no_cache=[file1],
            deps=[self.FOO, self.CODE],
            cmd=f"python {self.CODE} {self.FOO} {file1}",
            single_stage=True,
        )

        stages = self.dvc.reproduce(file1_stage)
        self.assertEqual(len(stages), 0)

        d = load_yaml(file1_stage)
        d["outs"][0]["metric"] = True
        dump_yaml(file1_stage, d)

        stages = self.dvc.reproduce(file1_stage)
        self.assertEqual(len(stages), 0)

        d = load_yaml(file1_stage)
        d["outs"][0]["metric"] = False
        dump_yaml(file1_stage, d)

        stages = self.dvc.reproduce(file1_stage)
        self.assertEqual(len(stages), 0)
コード例 #2
0
ファイル: test_stage_resolver.py プロジェクト: vladkol/dvc
def test_foreach_loop_templatized(tmp_dir, dvc):
    params = {"models": {"us": {"thresh": 10}}}
    vars_ = {"models": {"gb": {"thresh": 15}}}
    dump_yaml(tmp_dir / DEFAULT_PARAMS_FILE, params)
    d = {
        "vars": vars_,
        "stages": {
            "build": {
                "foreach": "${models}",
                "in": {
                    "cmd": "python script.py --thresh ${item.thresh}"
                },
            }
        },
    }

    resolver = DataResolver(dvc, PathInfo(str(tmp_dir)), d)
    assert_stage_equal(
        resolver.resolve(),
        {
            "stages": {
                "build-gb": {
                    "cmd": "python script.py --thresh 15"
                },
                "build-us": {
                    "cmd": "python script.py --thresh 10",
                    "params": ["models.us.thresh"],
                },
            }
        },
    )
コード例 #3
0
    def test(self):
        # File structure:
        #       .
        #       |-- dir1
        #       |  |__ dir2.dvc         (out.path == ../dir2)
        #       |__ dir2
        #           |__ something.dvc    (stage.cwd == ./dir2)

        os.mkdir(os.path.join(self.dvc.root_dir, "dir1"))

        self.dvc.run(
            fname=os.path.join("dir1", "dir2.dvc"),
            wdir="dir1",
            outs=[os.path.join("..", "dir2")],
            cmd="mkdir {path}".format(path=os.path.join("..", "dir2")),
            single_stage=True,
        )

        faulty_stage_path = os.path.join("dir2", "something.dvc")

        output = os.path.join("..", "something")
        stage_dump = {
            "cmd": f"echo something > {output}",
            "outs": [{
                "path": output
            }],
        }
        dump_yaml(faulty_stage_path, stage_dump)

        with self.assertRaises(StagePathAsOutputError):
            self.dvc.reproduce(faulty_stage_path)
コード例 #4
0
def test_collect_generated(tmp_dir, dvc):
    dvc.config["feature"]["parametrization"] = True
    d = {
        "vars": [{
            "vars": [1, 2, 3, 4, 5]
        }],
        "stages": {
            "build": {
                "foreach": "${vars}",
                "do": {
                    "cmd": "echo ${item}"
                }
            }
        },
    }
    dump_yaml("dvc.yaml", d)

    all_stages = set(dvc.stages)
    assert len(all_stages) == 5

    assert set(dvc.stage.collect()) == all_stages
    assert set(dvc.stage.collect("build", accept_group=True)) == all_stages
    assert (set(dvc.stage.collect("build", accept_group=True,
                                  with_deps=True)) == all_stages)
    assert set(dvc.stage.collect("build*", glob=True)) == all_stages
    assert (set(dvc.stage.collect("build*", glob=True,
                                  with_deps=True)) == all_stages)

    stages_info = {(stage, None) for stage in all_stages}
    assert (set(dvc.stage.collect_granular("build",
                                           accept_group=True)) == stages_info)
    assert (set(
        dvc.stage.collect_granular("build", accept_group=True,
                                   with_deps=True)) == stages_info)
コード例 #5
0
def test_params_file_with_dict_tracked(tmp_dir, dvc):
    foreach_data = {"model1": {"thresh": "foo"}, "model2": {"thresh": "bar"}}
    params = {"models": foreach_data}
    dump_yaml("params.yaml", params)

    resolver = DataResolver(dvc, tmp_dir, {})
    data = {"foreach": "${models}", "do": {"cmd": "echo ${item.thresh}"}}
    definition = ForeachDefinition(resolver, resolver.context, "build", data)

    assert definition.resolve_all() == {
        "build@model1": {
            "cmd": "echo foo"
        },
        "build@model2": {
            "cmd": "echo bar"
        },
    }
    # check that `foreach` item-key replacement didnot leave any leftovers.
    assert resolver.context == {"models": foreach_data}
    assert resolver.tracked_vars == {
        "build@model1": {
            "params.yaml": {
                "models.model1.thresh": "foo"
            }
        },
        "build@model2": {
            "params.yaml": {
                "models.model2.thresh": "bar"
            }
        },
    }
コード例 #6
0
ファイル: test_stage_resolver.py プロジェクト: metalglove/dvc
def test_vars_and_params_import(tmp_dir, dvc):
    """
    Test that vars and params are both merged together for interpolation,
    whilst tracking the "used" variables from params.
    """
    d = {
        "vars": [DEFAULT_PARAMS_FILE, {
            "dict": {
                "foo": "foobar"
            }
        }],
        "stages": {
            "stage1": {
                "cmd": "echo ${dict.foo} ${dict.bar}"
            }
        },
    }
    dump_yaml(tmp_dir / DEFAULT_PARAMS_FILE, {"dict": {"bar": "bar"}})
    resolver = DataResolver(dvc, PathInfo(str(tmp_dir)), d)

    assert_stage_equal(resolver.resolve(),
                       {"stages": {
                           "stage1": {
                               "cmd": "echo foobar bar"
                           }
                       }})
    assert resolver.tracked_vars == {
        "stage1": {
            DEFAULT_PARAMS_FILE: {
                "dict.bar": "bar"
            }
        }
    }
コード例 #7
0
ファイル: test_stage_resolver.py プロジェクト: metalglove/dvc
def test_resolve_local_tries_to_load_globally_used_params_yaml(tmp_dir, dvc):
    iterable = {"bar": "bar", "foo": "foo"}
    dump_yaml(tmp_dir / "params.yaml", iterable)

    d = {
        "stages": {
            "build": {
                "cmd": "command --value ${bar}",
                "params": [{
                    "params.yaml": ["foo"]
                }],
                "vars": ["params.yaml"],
            },
        },
    }
    resolver = DataResolver(dvc, PathInfo(str(tmp_dir)), d)
    assert_stage_equal(
        resolver.resolve(),
        {
            "stages": {
                "build": {
                    "cmd": "command --value bar",
                    "params": [{
                        "params.yaml": ["foo"]
                    }],
                },
            }
        },
    )
    assert resolver.tracked_vars == {"build": {"params.yaml": {"bar": "bar"}}}
コード例 #8
0
def test_repro_when_lockfile_gets_deleted(tmp_dir, dvc):
    tmp_dir.gen("copy.py", COPY_SCRIPT)
    tmp_dir.gen("foo", "foo")
    dump_yaml(
        PIPELINE_FILE,
        {
            "stages": {
                "run-copy": {
                    "cmd": "python copy.py {} {}".format("foo", "foobar"),
                    "deps": ["foo"],
                    "outs": ["foobar"],
                }
            }
        },
    )
    assert dvc.reproduce(":run-copy")
    assert os.path.exists(PIPELINE_LOCK)

    assert not dvc.reproduce(":run-copy")
    os.unlink(PIPELINE_LOCK)
    stages = dvc.reproduce(":run-copy")
    assert (
        stages
        and stages[0].relpath == PIPELINE_FILE
        and stages[0].name == "run-copy"
    )
コード例 #9
0
    def save(self, stage):
        from .serialize import to_single_stage_lockfile

        if not _can_hash(stage):
            return

        cache_key = _get_stage_hash(stage)
        cache = to_single_stage_lockfile(stage)
        cache_value = _get_cache_hash(cache)

        existing_cache = self._load_cache(cache_key, cache_value)
        cache = existing_cache or cache

        for out in self._uncached_outs(stage, cache):
            out.commit()

        if existing_cache:
            return

        from dvc.schema import COMPILED_LOCK_FILE_STAGE_SCHEMA
        from dvc.utils.serialize import dump_yaml

        # sanity check
        COMPILED_LOCK_FILE_STAGE_SCHEMA(cache)

        path = PathInfo(self._get_cache_path(cache_key, cache_value))
        self.repo.odb.local.makedirs(path.parent)
        tmp = tempfile.NamedTemporaryFile(delete=False, dir=path.parent).name
        assert os.path.exists(path.parent)
        assert os.path.isdir(path.parent)
        dump_yaml(tmp, cache)
        self.repo.odb.local.move(PathInfo(tmp), path)
コード例 #10
0
    def save(self, stage):
        if stage.is_callback or stage.always_changed:
            return

        cache_key = _get_stage_hash(stage)
        if not cache_key:
            return

        cache = to_single_stage_lockfile(stage)
        cache_value = _get_cache_hash(cache)

        existing_cache = self._load_cache(cache_key, cache_value)
        cache = existing_cache or cache

        for out in self._uncached_outs(stage, cache):
            out.commit()

        if existing_cache:
            return

        # sanity check
        COMPILED_LOCK_FILE_STAGE_SCHEMA(cache)

        path = PathInfo(self._get_cache_path(cache_key, cache_value))
        self.tree.makedirs(path.parent)
        tmp = tempfile.NamedTemporaryFile(delete=False, dir=path.parent).name
        assert os.path.exists(path.parent)
        assert os.path.isdir(path.parent)
        dump_yaml(tmp, cache)
        self.tree.move(PathInfo(tmp), path)
コード例 #11
0
def test_repro_when_new_deps_is_moved(tmp_dir, dvc):
    from dvc.dvcfile import Dvcfile

    tmp_dir.gen("copy.py", COPY_SCRIPT)
    tmp_dir.gen({"foo": "foo", "bar": "foo"})
    stage = dvc.run(
        cmd="python copy.py {} {}".format("foo", "foobar"),
        outs=["foobar"],
        deps=["foo"],
        name="copy-file",
    )
    target = ":copy-file"
    assert not dvc.reproduce(target)

    tmp_dir.gen("copy.py", COPY_SCRIPT_FORMAT.format("'bar'", "'foobar'"))
    from shutil import move

    move("foo", "bar")

    dvcfile = Dvcfile(dvc, stage.path)
    data, _ = dvcfile._load()
    data["stages"]["copy-file"]["deps"] = ["bar"]
    dump_yaml(stage.path, data)

    assert dvc.reproduce(target)[0] == stage
コード例 #12
0
ファイル: test_show.py プロジェクト: ush98/dvc
def test_plots_show_overlap(tmp_dir, dvc, run_copy_metrics, clear_before_run):
    data_dir = PathInfo("data")
    (tmp_dir / data_dir).mkdir()

    dump_yaml(data_dir / "m1_temp.yaml", {"a": {"b": {"c": 2, "d": 1}}})
    run_copy_metrics(
        str(data_dir / "m1_temp.yaml"),
        str(data_dir / "m1.yaml"),
        single_stage=False,
        commit="add m1",
        name="cp-m1",
        plots=[str(data_dir / "m1.yaml")],
    )
    with modify_yaml("dvc.yaml") as d:
        # trying to make an output overlaps error
        d["stages"]["corrupted-stage"] = {
            "cmd": "mkdir data",
            "outs": ["data"],
        }

    # running by clearing and not clearing stuffs
    # so as it works even for optimized cases
    if clear_before_run:
        remove(data_dir)
        remove(dvc.odb.local.cache_dir)

    dvc._reset()

    with pytest.raises(OverlappingOutputPathsError):
        dvc.plots.show()
コード例 #13
0
 def _gen(val):
     metrics = {"a": {"b": {"c": val, "d": 1, "e": str(val)}}}
     dump_yaml("m_temp.yaml", metrics)
     run_copy_metrics("m_temp.yaml",
                      "m.yaml",
                      metrics=["m.yaml"],
                      commit=str(val))
コード例 #14
0
def test_mixed_vars_for_foreach_data(tmp_dir, dvc):
    dump_yaml("params.yaml", {"models": {"model1": "foo"}})
    dump_yaml("test_params.yaml", {"models": {"model2": "bar"}})

    resolver = DataResolver(dvc, tmp_dir, {"vars": ["test_params.yaml"]})
    data = {"foreach": "${models}", "do": {"cmd": "echo ${item}"}}
    definition = ForeachDefinition(resolver, resolver.context, "build", data)

    assert definition.resolve_all() == {
        "build@model1": {
            "cmd": "echo foo"
        },
        "build@model2": {
            "cmd": "echo bar"
        },
    }
    assert resolver.context == {"models": {"model1": "foo", "model2": "bar"}}
    assert resolver.tracked_vars == {
        "build@model1": {
            "params.yaml": {
                "models.model1": "foo"
            }
        },
        "build@model2": {
            "test_params.yaml": {
                "models.model2": "bar"
            }
        },
    }
コード例 #15
0
def test_params_file_tracked_for_composite_list(tmp_dir, dvc):
    foreach_data = [{"thresh": "foo"}, {"thresh": "bar"}]
    params = {"models": foreach_data}
    dump_yaml("params.yaml", params)

    resolver = DataResolver(dvc, tmp_dir, {})
    data = {"foreach": "${models}", "do": {"cmd": "echo ${item.thresh}"}}
    definition = ForeachDefinition(resolver, resolver.context, "build", data)

    assert definition.resolve_all() == {
        "build@0": {
            "cmd": "echo foo"
        },
        "build@1": {
            "cmd": "echo bar"
        },
    }
    assert resolver.context == {"models": foreach_data}
    assert resolver.tracked_vars == {
        "build@0": {
            "params.yaml": {
                "models.0.thresh": "foo"
            }
        },
        "build@1": {
            "params.yaml": {
                "models.1.thresh": "bar"
            }
        },
    }
コード例 #16
0
    def test_similar_paths(self):
        # File structure:
        #
        #       .
        #       |-- something.dvc   (out.path == something)
        #       |-- something
        #       |__ something-1
        #          |-- a
        #          |__ a.dvc        (stage.cwd == something-1)

        self.dvc.run(outs=["something"],
                     cmd="mkdir something",
                     single_stage=True)

        os.mkdir("something-1")

        stage = os.path.join("something-1", "a.dvc")

        stage_dump = {"cmd": "echo a > a", "outs": [{"path": "a"}]}
        dump_yaml(stage, stage_dump)

        try:
            self.dvc.reproduce(stage)
        except StagePathAsOutputError:
            self.fail("should not raise StagePathAsOutputError")
コード例 #17
0
ファイル: test_repro.py プロジェクト: vijay-pinjala/dvc
    def test(self):
        self._run(
            deps=[self.FOO],
            outs=["bar.txt"],
            cmd="echo bar > bar.txt",
            name="copybarbar-txt",
        )

        self._run(
            deps=["bar.txt"],
            outs=["baz.txt"],
            cmd="echo baz > baz.txt",
            name="copybazbaz-txt",
        )

        stage_dump = {
            "cmd": "echo baz > foo",
            "deps": [{
                "path": "baz.txt"
            }],
            "outs": [{
                "path": self.FOO
            }],
        }
        dump_yaml("cycle.dvc", stage_dump)

        with self.assertRaises(CyclicGraphError):
            self.dvc.reproduce("cycle.dvc")
コード例 #18
0
def test_track_from_multiple_files(tmp_dir):
    d1 = {"Train": {"us": {"lr": 10}}}
    d2 = {"Train": {"us": {"layers": 100}}}

    tree = LocalTree(None, config={})
    path1 = tmp_dir / "params.yaml"
    path2 = tmp_dir / "params2.yaml"
    dump_yaml(path1, d1, tree)
    dump_yaml(path2, d2, tree)

    context = Context.load_from(tree, path1)
    c = Context.load_from(tree, path2)
    context.merge_update(c)

    def key_tracked(d, path, key):
        return key in d[relpath(path)]

    with context.track() as tracked:
        context.select("Train")
        assert not (
            key_tracked(tracked, path1, "Train")
            or key_tracked(tracked, path2, "Train")
        )

        context.select("Train.us")
        assert not (
            key_tracked(tracked, path1, "Train.us")
            or key_tracked(tracked, path2, "Train.us")
        )

        context.select("Train.us.lr")
        assert key_tracked(tracked, path1, "Train.us.lr") and not key_tracked(
            tracked, path2, "Train.us.lr"
        )
        context.select("Train.us.layers")
        assert not key_tracked(
            tracked, path1, "Train.us.layers"
        ) and key_tracked(tracked, path2, "Train.us.layers")

    context = Context.clone(context)
    assert not context._tracked_data

    # let's see with an alias
    context["us"] = context["Train"]["us"]
    with context.track() as tracked:
        context.select("us")
        assert not (
            key_tracked(tracked, path1, "Train.us")
            or key_tracked(tracked, path2, "Train.us")
        )

        context.select("us.lr")
        assert key_tracked(tracked, path1, "Train.us.lr") and not key_tracked(
            tracked, path2, "Train.us.lr"
        )
        context.select("Train.us.layers")
        assert not key_tracked(
            tracked, path1, "Train.us.layers"
        ) and key_tracked(tracked, path2, "Train.us.layers")
コード例 #19
0
    def test(self):
        d = load_yaml(self.file1_stage)
        del d[Stage.PARAM_OUTS][0][LocalTree.PARAM_CHECKSUM]
        del d[Stage.PARAM_DEPS][0][LocalTree.PARAM_CHECKSUM]
        dump_yaml(self.file1_stage, d)

        with pytest.raises(CheckoutError):
            self.dvc.checkout(force=True)
コード例 #20
0
    def test(self):
        d = load_yaml(self.file1_stage)
        del d[Stage.PARAM_OUTS][0][LocalFileSystem.PARAM_CHECKSUM]
        del d[Stage.PARAM_DEPS][0][LocalFileSystem.PARAM_CHECKSUM]
        dump_yaml(self.file1_stage, d)

        stages = self.dvc.reproduce(self.file1_stage)
        self.assertEqual(len(stages), 1)
コード例 #21
0
ファイル: test_resolver.py プロジェクト: ush98/dvc
def test_partial_vars_doesnot_exist(tmp_dir, dvc):
    dump_yaml("test_params.yaml", {"sub1": "sub1"})

    with pytest.raises(ResolveError) as exc_info:
        DataResolver(dvc, tmp_dir, {"vars": ["test_params.yaml:sub2"]})

    assert (str(exc_info.value) == "failed to parse 'vars' in 'dvc.yaml': "
            "could not find 'sub2' in 'test_params.yaml'")
コード例 #22
0
ファイル: test_dvcfile.py プロジェクト: vijay120/dvc
def test_stage_load_on_invalid_data(tmp_dir, dvc, file):
    data = {"is_this_a_valid_dvcfile": False}
    dump_yaml(file, data)
    dvcfile = Dvcfile(dvc, file)
    with pytest.raises(StageFileFormatError):
        assert dvcfile.stages
    with pytest.raises(StageFileFormatError):
        assert dvcfile.validate(data, file)
コード例 #23
0
def test_read_params_nested(tmp_dir, dvc):
    dump_yaml(DEFAULT_PARAMS_FILE,
              {"some": {
                  "path": {
                      "foo": ["val1", "val2"]
                  }
              }})
    dep = ParamsDependency(Stage(dvc), None, ["some.path.foo"])
    assert dep.read_params() == {"some.path.foo": ["val1", "val2"]}
コード例 #24
0
def test_simple(tmp_dir, dvc):
    dump_yaml(tmp_dir / DEFAULT_PARAMS_FILE, CONTEXT_DATA)
    resolver = DataResolver(
        dvc, PathInfo(str(tmp_dir)), deepcopy(TEMPLATED_DVC_YAML_DATA)
    )
    assert_stage_equal(resolver.resolve(), deepcopy(RESOLVED_DVC_YAML_DATA))
    assert resolver.tracked_vars == {
        "stage1": {DEFAULT_PARAMS_FILE: USED_VARS["stage1"]},
        "stage2": {DEFAULT_PARAMS_FILE: USED_VARS["stage2"]},
    }
コード例 #25
0
    def dump(self, stage, **kwargs):
        """Dumps given stage appropriately in the dvcfile."""
        from dvc.stage import PipelineStage

        assert not isinstance(stage, PipelineStage)
        if self.verify:
            check_dvcfile_path(self.repo, self.path)
        logger.debug(f"Saving information to '{relpath(self.path)}'.")
        dump_yaml(self.path, serialize.to_single_stage_file(stage))
        self.repo.scm.track_file(self.relpath)
コード例 #26
0
ファイル: test_dvcfile.py プロジェクト: ush98/dvc
def test_dvcfile_try_dumping_parametrized_stage(tmp_dir, dvc, data, name):
    dump_yaml("dvc.yaml", {"stages": data, "vars": [{"foo": "foobar"}]})

    stage = dvc.stage.load_one(name=name)
    dvcfile = stage.dvcfile

    with pytest.raises(ParametrizedDumpError) as exc:
        dvcfile.dump(stage)

    assert str(exc.value) == f"cannot dump a parametrized stage: '{name}'"
コード例 #27
0
ファイル: test_resolver.py プロジェクト: ush98/dvc
def test_local_declared_vars_overwrite(tmp_dir, dvc):
    dump_yaml(DEFAULT_PARAMS_FILE, DATA)

    d = {"vars": [DATA["models"], DATA["models"]]}
    with pytest.raises(ResolveError) as exc_info:
        DataResolver(dvc, tmp_dir, d)

    assert str(exc_info.value) == ("failed to parse 'vars' in 'dvc.yaml':\n"
                                   "cannot redefine 'bar' from 'vars[1]' "
                                   "as it already exists in 'vars[0]'")
コード例 #28
0
def test_vars_load_partial(tmp_dir, dvc, local, vars_):
    iterable = {"bar": "bar", "foo": "foo"}
    dump_yaml(tmp_dir / "test_params.yaml", iterable)
    d = {"stages": {"build": {"cmd": "echo ${bar}"}}}
    if local:
        d["stages"]["build"]["vars"] = vars_
    else:
        d["vars"] = vars_
    resolver = DataResolver(dvc, PathInfo(str(tmp_dir)), d)
    resolver.resolve()
コード例 #29
0
ファイル: test_lockfile.py プロジェクト: vijay-pinjala/dvc
def test_stage_dump_when_already_exists(tmp_dir, dvc):
    data = {"s1": {"cmd": "command", "deps": [], "outs": []}}
    dump_yaml("path.lock", data)
    stage = PipelineStage(name="s2", repo=dvc, path="path", cmd="command2")
    lockfile = Lockfile(dvc, "path.lock")
    lockfile.dump(stage)
    assert lockfile.load() == {
        "schema": "2.0",
        "stages": {**data, "s2": {"cmd": "command2"}},
    }
コード例 #30
0
ファイル: test_lockfile.py プロジェクト: ush98/dvc
def test_lockfile_invalid_versions(tmp_dir, dvc, version_info):
    lockdata = {**version_info, "stages": {"foo": {"cmd": "echo foo"}}}
    dump_yaml("dvc.lock", lockdata)
    with pytest.raises(LockfileCorruptedError) as exc_info:
        Lockfile(dvc, tmp_dir / "dvc.lock").load()

    assert str(exc_info.value) == "Lockfile 'dvc.lock' is corrupted."
    assert (str(exc_info.value.__cause__) == "'dvc.lock' format error: "
            f"invalid schema version {version_info['schema']}, "
            "expected one of ['2.0'] for dictionary value @ "
            "data['schema']")