def test(self): """ Check that adding/removing metrics doesn't affect stage state """ stages = self.dvc.add(self.FOO) self.assertEqual(len(stages), 1) self.assertTrue(stages[0] is not None) file1 = "file1" file1_stage = file1 + ".dvc" self.dvc.run( fname=file1_stage, outs_no_cache=[file1], deps=[self.FOO, self.CODE], cmd=f"python {self.CODE} {self.FOO} {file1}", single_stage=True, ) stages = self.dvc.reproduce(file1_stage) self.assertEqual(len(stages), 0) d = load_yaml(file1_stage) d["outs"][0]["metric"] = True dump_yaml(file1_stage, d) stages = self.dvc.reproduce(file1_stage) self.assertEqual(len(stages), 0) d = load_yaml(file1_stage) d["outs"][0]["metric"] = False dump_yaml(file1_stage, d) stages = self.dvc.reproduce(file1_stage) self.assertEqual(len(stages), 0)
def test_foreach_loop_templatized(tmp_dir, dvc): params = {"models": {"us": {"thresh": 10}}} vars_ = {"models": {"gb": {"thresh": 15}}} dump_yaml(tmp_dir / DEFAULT_PARAMS_FILE, params) d = { "vars": vars_, "stages": { "build": { "foreach": "${models}", "in": { "cmd": "python script.py --thresh ${item.thresh}" }, } }, } resolver = DataResolver(dvc, PathInfo(str(tmp_dir)), d) assert_stage_equal( resolver.resolve(), { "stages": { "build-gb": { "cmd": "python script.py --thresh 15" }, "build-us": { "cmd": "python script.py --thresh 10", "params": ["models.us.thresh"], }, } }, )
def test(self): # File structure: # . # |-- dir1 # | |__ dir2.dvc (out.path == ../dir2) # |__ dir2 # |__ something.dvc (stage.cwd == ./dir2) os.mkdir(os.path.join(self.dvc.root_dir, "dir1")) self.dvc.run( fname=os.path.join("dir1", "dir2.dvc"), wdir="dir1", outs=[os.path.join("..", "dir2")], cmd="mkdir {path}".format(path=os.path.join("..", "dir2")), single_stage=True, ) faulty_stage_path = os.path.join("dir2", "something.dvc") output = os.path.join("..", "something") stage_dump = { "cmd": f"echo something > {output}", "outs": [{ "path": output }], } dump_yaml(faulty_stage_path, stage_dump) with self.assertRaises(StagePathAsOutputError): self.dvc.reproduce(faulty_stage_path)
def test_collect_generated(tmp_dir, dvc): dvc.config["feature"]["parametrization"] = True d = { "vars": [{ "vars": [1, 2, 3, 4, 5] }], "stages": { "build": { "foreach": "${vars}", "do": { "cmd": "echo ${item}" } } }, } dump_yaml("dvc.yaml", d) all_stages = set(dvc.stages) assert len(all_stages) == 5 assert set(dvc.stage.collect()) == all_stages assert set(dvc.stage.collect("build", accept_group=True)) == all_stages assert (set(dvc.stage.collect("build", accept_group=True, with_deps=True)) == all_stages) assert set(dvc.stage.collect("build*", glob=True)) == all_stages assert (set(dvc.stage.collect("build*", glob=True, with_deps=True)) == all_stages) stages_info = {(stage, None) for stage in all_stages} assert (set(dvc.stage.collect_granular("build", accept_group=True)) == stages_info) assert (set( dvc.stage.collect_granular("build", accept_group=True, with_deps=True)) == stages_info)
def test_params_file_with_dict_tracked(tmp_dir, dvc): foreach_data = {"model1": {"thresh": "foo"}, "model2": {"thresh": "bar"}} params = {"models": foreach_data} dump_yaml("params.yaml", params) resolver = DataResolver(dvc, tmp_dir, {}) data = {"foreach": "${models}", "do": {"cmd": "echo ${item.thresh}"}} definition = ForeachDefinition(resolver, resolver.context, "build", data) assert definition.resolve_all() == { "build@model1": { "cmd": "echo foo" }, "build@model2": { "cmd": "echo bar" }, } # check that `foreach` item-key replacement didnot leave any leftovers. assert resolver.context == {"models": foreach_data} assert resolver.tracked_vars == { "build@model1": { "params.yaml": { "models.model1.thresh": "foo" } }, "build@model2": { "params.yaml": { "models.model2.thresh": "bar" } }, }
def test_vars_and_params_import(tmp_dir, dvc): """ Test that vars and params are both merged together for interpolation, whilst tracking the "used" variables from params. """ d = { "vars": [DEFAULT_PARAMS_FILE, { "dict": { "foo": "foobar" } }], "stages": { "stage1": { "cmd": "echo ${dict.foo} ${dict.bar}" } }, } dump_yaml(tmp_dir / DEFAULT_PARAMS_FILE, {"dict": {"bar": "bar"}}) resolver = DataResolver(dvc, PathInfo(str(tmp_dir)), d) assert_stage_equal(resolver.resolve(), {"stages": { "stage1": { "cmd": "echo foobar bar" } }}) assert resolver.tracked_vars == { "stage1": { DEFAULT_PARAMS_FILE: { "dict.bar": "bar" } } }
def test_resolve_local_tries_to_load_globally_used_params_yaml(tmp_dir, dvc): iterable = {"bar": "bar", "foo": "foo"} dump_yaml(tmp_dir / "params.yaml", iterable) d = { "stages": { "build": { "cmd": "command --value ${bar}", "params": [{ "params.yaml": ["foo"] }], "vars": ["params.yaml"], }, }, } resolver = DataResolver(dvc, PathInfo(str(tmp_dir)), d) assert_stage_equal( resolver.resolve(), { "stages": { "build": { "cmd": "command --value bar", "params": [{ "params.yaml": ["foo"] }], }, } }, ) assert resolver.tracked_vars == {"build": {"params.yaml": {"bar": "bar"}}}
def test_repro_when_lockfile_gets_deleted(tmp_dir, dvc): tmp_dir.gen("copy.py", COPY_SCRIPT) tmp_dir.gen("foo", "foo") dump_yaml( PIPELINE_FILE, { "stages": { "run-copy": { "cmd": "python copy.py {} {}".format("foo", "foobar"), "deps": ["foo"], "outs": ["foobar"], } } }, ) assert dvc.reproduce(":run-copy") assert os.path.exists(PIPELINE_LOCK) assert not dvc.reproduce(":run-copy") os.unlink(PIPELINE_LOCK) stages = dvc.reproduce(":run-copy") assert ( stages and stages[0].relpath == PIPELINE_FILE and stages[0].name == "run-copy" )
def save(self, stage): from .serialize import to_single_stage_lockfile if not _can_hash(stage): return cache_key = _get_stage_hash(stage) cache = to_single_stage_lockfile(stage) cache_value = _get_cache_hash(cache) existing_cache = self._load_cache(cache_key, cache_value) cache = existing_cache or cache for out in self._uncached_outs(stage, cache): out.commit() if existing_cache: return from dvc.schema import COMPILED_LOCK_FILE_STAGE_SCHEMA from dvc.utils.serialize import dump_yaml # sanity check COMPILED_LOCK_FILE_STAGE_SCHEMA(cache) path = PathInfo(self._get_cache_path(cache_key, cache_value)) self.repo.odb.local.makedirs(path.parent) tmp = tempfile.NamedTemporaryFile(delete=False, dir=path.parent).name assert os.path.exists(path.parent) assert os.path.isdir(path.parent) dump_yaml(tmp, cache) self.repo.odb.local.move(PathInfo(tmp), path)
def save(self, stage): if stage.is_callback or stage.always_changed: return cache_key = _get_stage_hash(stage) if not cache_key: return cache = to_single_stage_lockfile(stage) cache_value = _get_cache_hash(cache) existing_cache = self._load_cache(cache_key, cache_value) cache = existing_cache or cache for out in self._uncached_outs(stage, cache): out.commit() if existing_cache: return # sanity check COMPILED_LOCK_FILE_STAGE_SCHEMA(cache) path = PathInfo(self._get_cache_path(cache_key, cache_value)) self.tree.makedirs(path.parent) tmp = tempfile.NamedTemporaryFile(delete=False, dir=path.parent).name assert os.path.exists(path.parent) assert os.path.isdir(path.parent) dump_yaml(tmp, cache) self.tree.move(PathInfo(tmp), path)
def test_repro_when_new_deps_is_moved(tmp_dir, dvc): from dvc.dvcfile import Dvcfile tmp_dir.gen("copy.py", COPY_SCRIPT) tmp_dir.gen({"foo": "foo", "bar": "foo"}) stage = dvc.run( cmd="python copy.py {} {}".format("foo", "foobar"), outs=["foobar"], deps=["foo"], name="copy-file", ) target = ":copy-file" assert not dvc.reproduce(target) tmp_dir.gen("copy.py", COPY_SCRIPT_FORMAT.format("'bar'", "'foobar'")) from shutil import move move("foo", "bar") dvcfile = Dvcfile(dvc, stage.path) data, _ = dvcfile._load() data["stages"]["copy-file"]["deps"] = ["bar"] dump_yaml(stage.path, data) assert dvc.reproduce(target)[0] == stage
def test_plots_show_overlap(tmp_dir, dvc, run_copy_metrics, clear_before_run): data_dir = PathInfo("data") (tmp_dir / data_dir).mkdir() dump_yaml(data_dir / "m1_temp.yaml", {"a": {"b": {"c": 2, "d": 1}}}) run_copy_metrics( str(data_dir / "m1_temp.yaml"), str(data_dir / "m1.yaml"), single_stage=False, commit="add m1", name="cp-m1", plots=[str(data_dir / "m1.yaml")], ) with modify_yaml("dvc.yaml") as d: # trying to make an output overlaps error d["stages"]["corrupted-stage"] = { "cmd": "mkdir data", "outs": ["data"], } # running by clearing and not clearing stuffs # so as it works even for optimized cases if clear_before_run: remove(data_dir) remove(dvc.odb.local.cache_dir) dvc._reset() with pytest.raises(OverlappingOutputPathsError): dvc.plots.show()
def _gen(val): metrics = {"a": {"b": {"c": val, "d": 1, "e": str(val)}}} dump_yaml("m_temp.yaml", metrics) run_copy_metrics("m_temp.yaml", "m.yaml", metrics=["m.yaml"], commit=str(val))
def test_mixed_vars_for_foreach_data(tmp_dir, dvc): dump_yaml("params.yaml", {"models": {"model1": "foo"}}) dump_yaml("test_params.yaml", {"models": {"model2": "bar"}}) resolver = DataResolver(dvc, tmp_dir, {"vars": ["test_params.yaml"]}) data = {"foreach": "${models}", "do": {"cmd": "echo ${item}"}} definition = ForeachDefinition(resolver, resolver.context, "build", data) assert definition.resolve_all() == { "build@model1": { "cmd": "echo foo" }, "build@model2": { "cmd": "echo bar" }, } assert resolver.context == {"models": {"model1": "foo", "model2": "bar"}} assert resolver.tracked_vars == { "build@model1": { "params.yaml": { "models.model1": "foo" } }, "build@model2": { "test_params.yaml": { "models.model2": "bar" } }, }
def test_params_file_tracked_for_composite_list(tmp_dir, dvc): foreach_data = [{"thresh": "foo"}, {"thresh": "bar"}] params = {"models": foreach_data} dump_yaml("params.yaml", params) resolver = DataResolver(dvc, tmp_dir, {}) data = {"foreach": "${models}", "do": {"cmd": "echo ${item.thresh}"}} definition = ForeachDefinition(resolver, resolver.context, "build", data) assert definition.resolve_all() == { "build@0": { "cmd": "echo foo" }, "build@1": { "cmd": "echo bar" }, } assert resolver.context == {"models": foreach_data} assert resolver.tracked_vars == { "build@0": { "params.yaml": { "models.0.thresh": "foo" } }, "build@1": { "params.yaml": { "models.1.thresh": "bar" } }, }
def test_similar_paths(self): # File structure: # # . # |-- something.dvc (out.path == something) # |-- something # |__ something-1 # |-- a # |__ a.dvc (stage.cwd == something-1) self.dvc.run(outs=["something"], cmd="mkdir something", single_stage=True) os.mkdir("something-1") stage = os.path.join("something-1", "a.dvc") stage_dump = {"cmd": "echo a > a", "outs": [{"path": "a"}]} dump_yaml(stage, stage_dump) try: self.dvc.reproduce(stage) except StagePathAsOutputError: self.fail("should not raise StagePathAsOutputError")
def test(self): self._run( deps=[self.FOO], outs=["bar.txt"], cmd="echo bar > bar.txt", name="copybarbar-txt", ) self._run( deps=["bar.txt"], outs=["baz.txt"], cmd="echo baz > baz.txt", name="copybazbaz-txt", ) stage_dump = { "cmd": "echo baz > foo", "deps": [{ "path": "baz.txt" }], "outs": [{ "path": self.FOO }], } dump_yaml("cycle.dvc", stage_dump) with self.assertRaises(CyclicGraphError): self.dvc.reproduce("cycle.dvc")
def test_track_from_multiple_files(tmp_dir): d1 = {"Train": {"us": {"lr": 10}}} d2 = {"Train": {"us": {"layers": 100}}} tree = LocalTree(None, config={}) path1 = tmp_dir / "params.yaml" path2 = tmp_dir / "params2.yaml" dump_yaml(path1, d1, tree) dump_yaml(path2, d2, tree) context = Context.load_from(tree, path1) c = Context.load_from(tree, path2) context.merge_update(c) def key_tracked(d, path, key): return key in d[relpath(path)] with context.track() as tracked: context.select("Train") assert not ( key_tracked(tracked, path1, "Train") or key_tracked(tracked, path2, "Train") ) context.select("Train.us") assert not ( key_tracked(tracked, path1, "Train.us") or key_tracked(tracked, path2, "Train.us") ) context.select("Train.us.lr") assert key_tracked(tracked, path1, "Train.us.lr") and not key_tracked( tracked, path2, "Train.us.lr" ) context.select("Train.us.layers") assert not key_tracked( tracked, path1, "Train.us.layers" ) and key_tracked(tracked, path2, "Train.us.layers") context = Context.clone(context) assert not context._tracked_data # let's see with an alias context["us"] = context["Train"]["us"] with context.track() as tracked: context.select("us") assert not ( key_tracked(tracked, path1, "Train.us") or key_tracked(tracked, path2, "Train.us") ) context.select("us.lr") assert key_tracked(tracked, path1, "Train.us.lr") and not key_tracked( tracked, path2, "Train.us.lr" ) context.select("Train.us.layers") assert not key_tracked( tracked, path1, "Train.us.layers" ) and key_tracked(tracked, path2, "Train.us.layers")
def test(self): d = load_yaml(self.file1_stage) del d[Stage.PARAM_OUTS][0][LocalTree.PARAM_CHECKSUM] del d[Stage.PARAM_DEPS][0][LocalTree.PARAM_CHECKSUM] dump_yaml(self.file1_stage, d) with pytest.raises(CheckoutError): self.dvc.checkout(force=True)
def test(self): d = load_yaml(self.file1_stage) del d[Stage.PARAM_OUTS][0][LocalFileSystem.PARAM_CHECKSUM] del d[Stage.PARAM_DEPS][0][LocalFileSystem.PARAM_CHECKSUM] dump_yaml(self.file1_stage, d) stages = self.dvc.reproduce(self.file1_stage) self.assertEqual(len(stages), 1)
def test_partial_vars_doesnot_exist(tmp_dir, dvc): dump_yaml("test_params.yaml", {"sub1": "sub1"}) with pytest.raises(ResolveError) as exc_info: DataResolver(dvc, tmp_dir, {"vars": ["test_params.yaml:sub2"]}) assert (str(exc_info.value) == "failed to parse 'vars' in 'dvc.yaml': " "could not find 'sub2' in 'test_params.yaml'")
def test_stage_load_on_invalid_data(tmp_dir, dvc, file): data = {"is_this_a_valid_dvcfile": False} dump_yaml(file, data) dvcfile = Dvcfile(dvc, file) with pytest.raises(StageFileFormatError): assert dvcfile.stages with pytest.raises(StageFileFormatError): assert dvcfile.validate(data, file)
def test_read_params_nested(tmp_dir, dvc): dump_yaml(DEFAULT_PARAMS_FILE, {"some": { "path": { "foo": ["val1", "val2"] } }}) dep = ParamsDependency(Stage(dvc), None, ["some.path.foo"]) assert dep.read_params() == {"some.path.foo": ["val1", "val2"]}
def test_simple(tmp_dir, dvc): dump_yaml(tmp_dir / DEFAULT_PARAMS_FILE, CONTEXT_DATA) resolver = DataResolver( dvc, PathInfo(str(tmp_dir)), deepcopy(TEMPLATED_DVC_YAML_DATA) ) assert_stage_equal(resolver.resolve(), deepcopy(RESOLVED_DVC_YAML_DATA)) assert resolver.tracked_vars == { "stage1": {DEFAULT_PARAMS_FILE: USED_VARS["stage1"]}, "stage2": {DEFAULT_PARAMS_FILE: USED_VARS["stage2"]}, }
def dump(self, stage, **kwargs): """Dumps given stage appropriately in the dvcfile.""" from dvc.stage import PipelineStage assert not isinstance(stage, PipelineStage) if self.verify: check_dvcfile_path(self.repo, self.path) logger.debug(f"Saving information to '{relpath(self.path)}'.") dump_yaml(self.path, serialize.to_single_stage_file(stage)) self.repo.scm.track_file(self.relpath)
def test_dvcfile_try_dumping_parametrized_stage(tmp_dir, dvc, data, name): dump_yaml("dvc.yaml", {"stages": data, "vars": [{"foo": "foobar"}]}) stage = dvc.stage.load_one(name=name) dvcfile = stage.dvcfile with pytest.raises(ParametrizedDumpError) as exc: dvcfile.dump(stage) assert str(exc.value) == f"cannot dump a parametrized stage: '{name}'"
def test_local_declared_vars_overwrite(tmp_dir, dvc): dump_yaml(DEFAULT_PARAMS_FILE, DATA) d = {"vars": [DATA["models"], DATA["models"]]} with pytest.raises(ResolveError) as exc_info: DataResolver(dvc, tmp_dir, d) assert str(exc_info.value) == ("failed to parse 'vars' in 'dvc.yaml':\n" "cannot redefine 'bar' from 'vars[1]' " "as it already exists in 'vars[0]'")
def test_vars_load_partial(tmp_dir, dvc, local, vars_): iterable = {"bar": "bar", "foo": "foo"} dump_yaml(tmp_dir / "test_params.yaml", iterable) d = {"stages": {"build": {"cmd": "echo ${bar}"}}} if local: d["stages"]["build"]["vars"] = vars_ else: d["vars"] = vars_ resolver = DataResolver(dvc, PathInfo(str(tmp_dir)), d) resolver.resolve()
def test_stage_dump_when_already_exists(tmp_dir, dvc): data = {"s1": {"cmd": "command", "deps": [], "outs": []}} dump_yaml("path.lock", data) stage = PipelineStage(name="s2", repo=dvc, path="path", cmd="command2") lockfile = Lockfile(dvc, "path.lock") lockfile.dump(stage) assert lockfile.load() == { "schema": "2.0", "stages": {**data, "s2": {"cmd": "command2"}}, }
def test_lockfile_invalid_versions(tmp_dir, dvc, version_info): lockdata = {**version_info, "stages": {"foo": {"cmd": "echo foo"}}} dump_yaml("dvc.lock", lockdata) with pytest.raises(LockfileCorruptedError) as exc_info: Lockfile(dvc, tmp_dir / "dvc.lock").load() assert str(exc_info.value) == "Lockfile 'dvc.lock' is corrupted." assert (str(exc_info.value.__cause__) == "'dvc.lock' format error: " f"invalid schema version {version_info['schema']}, " "expected one of ['2.0'] for dictionary value @ " "data['schema']")