def test(self): # File structure: # . # |-- dir1 # | |__ dir2.dvc (out.path == ../dir2) # |__ dir2 # |__ something.dvc (stage.cwd == ./dir2) os.mkdir(os.path.join(self.dvc.root_dir, "dir1")) self.dvc.run( cwd="dir1", outs=["../dir2"], cmd="mkdir {path}".format(path=os.path.join("..", "dir2")), ) faulty_stage_path = os.path.join("dir2", "something.dvc") output = os.path.join("..", "something") stage_dump = { "cmd": "echo something > {}".format(output), "outs": [{ "path": output }], } dump_stage_file(faulty_stage_path, stage_dump) with self.assertRaises(StagePathAsOutputError): self.dvc.reproduce(faulty_stage_path)
def test(self): d = load_stage_file(self.file1_stage) del d[Stage.PARAM_OUTS][0][RemoteLOCAL.PARAM_CHECKSUM] del d[Stage.PARAM_DEPS][0][RemoteLOCAL.PARAM_CHECKSUM] dump_stage_file(self.file1_stage, d) self.dvc.checkout(force=True)
def dump(self): fname = self.path self._check_dvc_filename(fname) logger.debug( "Saving information to '{file}'.".format(file=relpath(fname))) state = self.dumpd() # When we load a stage we parse yaml with a fast parser, which strips # off all the comments and formatting. To retain those on update we do # a trick here: # - reparse the same yaml text with a slow but smart ruamel yaml parser # - apply changes to a returned structure # - serialize it if self._stage_text is not None: saved_state = parse_stage_for_update(self._stage_text, fname) # Stage doesn't work with meta in any way, so .dumpd() doesn't # have it. We simply copy it over. if "meta" in saved_state: state["meta"] = saved_state["meta"] apply_diff(state, saved_state) state = saved_state dump_stage_file(fname, state) self.repo.scm.track_file(relpath(fname))
def test_repro_when_lockfile_gets_deleted(tmp_dir, dvc): tmp_dir.gen("copy.py", COPY_SCRIPT) tmp_dir.gen("foo", "foo") dump_stage_file( PIPELINE_FILE, { "stages": { "run-copy": { "cmd": "python copy.py {} {}".format("foo", "foobar"), "deps": ["foo"], "outs": ["foobar"], } } }, ) assert dvc.reproduce(":run-copy") assert os.path.exists(PIPELINE_LOCK) assert not dvc.reproduce(":run-copy") os.unlink(PIPELINE_LOCK) stages = dvc.reproduce(":run-copy") assert ( stages and stages[0].relpath == PIPELINE_FILE and stages[0].name == "run-copy" )
def test_repro_when_new_deps_is_moved(tmp_dir, dvc): from dvc.dvcfile import Dvcfile tmp_dir.gen("copy.py", COPY_SCRIPT) tmp_dir.gen({"foo": "foo", "bar": "foo"}) stage = dvc.run( cmd="python copy.py {} {}".format("foo", "foobar"), outs=["foobar"], deps=["foo"], name="copy-file", ) target = ":copy-file" assert not dvc.reproduce(target) tmp_dir.gen("copy.py", COPY_SCRIPT_FORMAT.format("'bar'", "'foobar'")) from shutil import move move("foo", "bar") dvcfile = Dvcfile(dvc, stage.path) data, _ = dvcfile._load() data["stages"]["copy-file"]["deps"] = ["bar"] dump_stage_file(stage.path, data) assert dvc.reproduce(target)[0] == stage
def test(self): d = load_stage_file(self.file1_stage) del d[Stage.PARAM_OUTS][0][LocalRemote.PARAM_CHECKSUM] del d[Stage.PARAM_DEPS][0][LocalRemote.PARAM_CHECKSUM] dump_stage_file(self.file1_stage, d) with pytest.raises(CheckoutError): self.dvc.checkout(force=True)
def test(self): d = load_stage_file(self.file1_stage) del d[Stage.PARAM_OUTS][0][RemoteLOCAL.PARAM_CHECKSUM] del d[Stage.PARAM_DEPS][0][RemoteLOCAL.PARAM_CHECKSUM] dump_stage_file(self.file1_stage, d) stages = self.dvc.reproduce(self.file1_stage) self.assertEqual(len(stages), 1)
def dump(self, stage, **kwargs): """Dumps given stage appropriately in the dvcfile.""" from dvc.stage import PipelineStage assert not isinstance(stage, PipelineStage) check_dvc_filename(self.path) logger.debug( "Saving information to '{file}'.".format(file=relpath(self.path))) dump_stage_file(self.path, serialize.to_single_stage_file(stage)) self.repo.scm.track_file(relpath(self.path))
def test_commit_changed_md5(tmp_dir, dvc): tmp_dir.gen({"file": "file content"}) (stage, ) = dvc.add("file", no_commit=True) stage_file_content = load_stage_file(stage.path) stage_file_content["md5"] = "1111111111" dump_stage_file(stage.path, stage_file_content) with pytest.raises(StageCommitError): dvc.commit(stage.path) dvc.commit(stage.path, force=True)
def dump(self): fname = self.path self._check_dvc_filename(fname) logger.info( "Saving information to '{file}'.".format(file=relpath(fname))) d = self.dumpd() apply_diff(d, self._state) dump_stage_file(fname, self._state) self.repo.scm.track_file(relpath(fname))
def dump(self, stage, **kwargs): stage_data = serialize.to_lockfile(stage) if not self.exists(): data = stage_data open(self.path, "w+").close() else: with self.repo.tree.open(self.path, "r") as fd: data = parse_stage_for_update(fd.read(), self.path) data.update(stage_data) dump_stage_file(self.path, data) self.repo.scm.track_file(relpath(self.path))
def test_commit_changed_md5(dvc_repo, repo_dir): stages = dvc_repo.add(repo_dir.FOO, no_commit=True) assert len(stages) == 1 stage = stages[0] stage_file_content = load_stage_file(stage.path) stage_file_content["md5"] = "1111111111" dump_stage_file(stage.path, stage_file_content) with pytest.raises(StageCommitError): dvc_repo.commit(stage.path) dvc_repo.commit(stage.path, force=True)
def test_meta_is_preserved(dvc_repo): stage, = dvc_repo.add("foo") # Add meta to stage file data = load_stage_file(stage.path) data["meta"] = {"custom_key": 42} dump_stage_file(stage.path, data) # Loading and dumping to test that it works and meta is retained new_stage = Stage.load(dvc_repo, stage.path) new_stage.dump() new_data = load_stage_file(stage.path) assert new_data["meta"] == data["meta"]
def test_meta_is_preserved(tmp_dir, dvc): (stage, ) = tmp_dir.dvc_gen("foo", "foo content") # Add meta to DVC-file data = load_stage_file(stage.path) data["meta"] = {"custom_key": 42} dump_stage_file(stage.path, data) # Loading and dumping to test that it works and meta is retained new_stage = Stage.load(dvc, stage.path) new_stage.dump() new_data = load_stage_file(stage.path) assert new_data["meta"] == data["meta"]
def test_nested(self): from dvc.stage import Stage # # . # |-- a # | |__ nested # | |__ dir # | |__ error.dvc (stage.cwd == 'a/nested/dir') # |__ b # |__ nested.dvc (stage.out == 'a/nested') dir1 = "b" dir2 = "a" os.mkdir(dir1) os.mkdir(dir2) nested_dir = os.path.join(dir2, "nested") out_dir = os.path.relpath(nested_dir, dir1) nested_stage = self.dvc.run( cwd=dir1, # b outs=[out_dir], # ../a/nested cmd="mkdir {path}".format(path=out_dir), ) os.mkdir(os.path.join(nested_dir, "dir")) error_stage_path = os.path.join(nested_dir, "dir", "error.dvc") output = os.path.join("..", "..", "something") stage_dump = { "cmd": "echo something > {}".format(output), "outs": [{ "path": output }], } dump_stage_file(error_stage_path, stage_dump) # NOTE: os.walk() walks in a sorted order and we need dir2 subdirs to # be processed before dir1 to load error.dvc first. with patch.object(DvcRepo, "stages") as mock_stages: mock_stages.return_value = [ nested_stage, Stage.load(self.dvc, error_stage_path), ] with self.assertRaises(StagePathAsOutputError): self.dvc.reproduce(error_stage_path)
def test_cyclic_graph_error(tmp_dir, dvc, run_copy): tmp_dir.gen("foo", "foo") run_copy("foo", "bar", name="copy-foo-bar") run_copy("bar", "baz", name="copy-bar-baz") run_copy("baz", "foobar", name="copy-baz-foobar") with open(PIPELINE_FILE, "r") as f: data = parse_stage(f.read(), PIPELINE_FILE) data["stages"]["copy-baz-foo"] = { "cmd": "echo baz > foo", "deps": ["baz"], "outs": ["foo"], } dump_stage_file(PIPELINE_FILE, data) with pytest.raises(CyclicGraphError): dvc.reproduce(":copy-baz-foo")
def test_nested(self): # . # |-- a # | |__ nested # | |__ dir # | |__ error.dvc (stage.cwd == 'a/nested/dir') # |__ b # |__ nested.dvc (stage.out == 'a/nested') dir1 = "b" dir2 = "a" os.mkdir(dir1) os.mkdir(dir2) nested_dir = os.path.join(dir2, "nested") out_dir = relpath(nested_dir, dir1) nested_stage = self.dvc.run( fname=os.path.join(dir1, "b.dvc"), wdir=dir1, outs=[out_dir], # ../a/nested cmd="mkdir {path}".format(path=out_dir), ) os.mkdir(os.path.join(nested_dir, "dir")) error_stage_path = os.path.join(nested_dir, "dir", "error.dvc") output = os.path.join("..", "..", "something") stage_dump = { "cmd": "echo something > {}".format(output), "outs": [{"path": output}], } dump_stage_file(error_stage_path, stage_dump) # NOTE: os.walk() walks in a sorted order and we need dir2 subdirs to # be processed before dir1 to load error.dvc first. self.dvc.stages = [ nested_stage, Stage.load(self.dvc, error_stage_path), ] with patch.object(self.dvc, "_reset"): # to prevent `stages` resetting with self.assertRaises(StagePathAsOutputError): self.dvc.reproduce(error_stage_path)
def dump(self, stage, **kwargs): stage_data = serialize.to_lockfile(stage) if not self.exists(): modified = True logger.info("Generating lock file '%s'", self.relpath) data = stage_data open(self.path, "w+").close() else: with self.repo.tree.open(self.path, "r") as fd: data = parse_stage_for_update(fd.read(), self.path) modified = data.get(stage.name, {}) != stage_data.get( stage.name, {}) if modified: logger.info("Updating lock file '%s'", self.relpath) data.update(stage_data) dump_stage_file(self.path, data) if modified: self.repo.scm.track_file(self.relpath)
def test(self): self.dvc.run( deps=[self.FOO], outs=["bar.txt"], cmd="echo bar > bar.txt" ) self.dvc.run( deps=["bar.txt"], outs=["baz.txt"], cmd="echo baz > baz.txt" ) stage_dump = { "cmd": "echo baz > foo", "deps": [{"path": "baz.txt"}], "outs": [{"path": self.FOO}], } dump_stage_file("cycle.dvc", stage_dump) with self.assertRaises(CyclicGraphError): self.dvc.reproduce("cycle.dvc")
def _dump_pipeline_file(self, stage): data = {} if self.exists(): with open(self.path) as fd: data = parse_stage_for_update(fd.read(), self.path) else: open(self.path, "w+").close() data["stages"] = data.get("stages", {}) stage_data = serialize.to_pipeline_file(stage) if data["stages"].get(stage.name): orig_stage_data = data["stages"][stage.name] apply_diff(stage_data[stage.name], orig_stage_data) else: data["stages"].update(stage_data) dump_stage_file(self.path, data) self.repo.scm.track_file(relpath(self.path))
def save(self, stage): cache_key = _get_stage_hash(stage) if not cache_key: return cache = to_single_stage_lockfile(stage) cache_value = _get_cache_hash(cache) if self._load_cache(cache_key, cache_value): return # sanity check COMPILED_LOCK_FILE_STAGE_SCHEMA(cache) path = self._get_cache_path(cache_key, cache_value) dpath = os.path.dirname(path) makedirs(dpath, exist_ok=True) dump_stage_file(path, cache)
def test_repro_when_new_outs_added_does_not_exist(tmp_dir, dvc): from dvc.exceptions import ReproductionError tmp_dir.gen("copy.py", COPY_SCRIPT) tmp_dir.gen("foo", "foo") dump_stage_file( PIPELINE_FILE, { "stages": { "run-copy": { "cmd": "python copy {} {}".format("foo", "foobar"), "deps": ["foo"], "outs": ["foobar", "bar"], } } }, ) with pytest.raises(ReproductionError): dvc.reproduce(":run-copy")
def test_repro_when_new_out_overlaps_others_stage_outs(tmp_dir, dvc): from dvc.exceptions import OverlappingOutputPathsError tmp_dir.gen({"dir": {"file1": "file1"}, "foo": "foo"}) dvc.add("dir") dump_stage_file( PIPELINE_FILE, { "stages": { "run-copy": { "cmd": "python copy {} {}".format("foo", "dir/foo"), "deps": ["foo"], "outs": ["dir/foo"], } } }, ) with pytest.raises(OverlappingOutputPathsError): dvc.reproduce(":run-copy")
def test(self): stages = self.dvc.add(self.FOO) self.assertEqual(len(stages), 1) stage = stages[0] self.assertTrue(stage is not None) d = load_stage_file(stage.relpath) # NOTE: checking that reloaded stage didn't change its checksum md5 = "11111111111111111111111111111111" d[stage.PARAM_MD5] = md5 dump_stage_file(stage.relpath, d) stage = Stage.load(self.dvc, stage.relpath) self.assertTrue(stage is not None) stage.dump() d = load_stage_file(stage.relpath) self.assertEqual(d[stage.PARAM_MD5], md5)
def test_repro_when_new_outs_is_added_in_dvcfile(tmp_dir, dvc): from dvc.dvcfile import Dvcfile tmp_dir.gen("copy.py", COPY_SCRIPT) tmp_dir.gen({"foo": "foo", "bar": "bar"}) stage = dvc.run( cmd="python copy.py {} {}".format("foo", "foobar"), outs=[], # scenario where user forgot to add deps=["foo"], name="copy-file", ) target = ":copy-file" assert not dvc.reproduce(target) dvcfile = Dvcfile(dvc, stage.path) data, _ = dvcfile._load() data["stages"]["copy-file"]["outs"] = ["foobar"] dump_stage_file(stage.path, data) assert dvc.reproduce(target)[0] == stage
def test_repro_when_new_deps_is_added_in_dvcfile(tmp_dir, dvc, run_copy): from dvc.dvcfile import Dvcfile tmp_dir.gen("copy.py", COPY_SCRIPT) tmp_dir.gen({"foo": "foo", "bar": "bar"}) stage = dvc.run( cmd="python copy.py {} {}".format("foo", "foobar"), outs=["foobar"], deps=["foo"], name="copy-file", ) target = PIPELINE_FILE + ":copy-file" assert not dvc.reproduce(target) dvcfile = Dvcfile(dvc, stage.path) data, _ = dvcfile._load() data["stages"]["copy-file"]["deps"] += ["copy.py"] dump_stage_file(stage.path, data) assert dvc.reproduce(target)[0] == stage
def _test(self): url = Local.get_url() self.main(["remote", "add", "-d", TEST_REMOTE, url]) stage = self.dvc.run(outs=["bar"], cmd="echo bar > bar") self.main(["push"]) stage_file_path = stage.relpath content = load_stage_file(stage_file_path) del content["outs"][0]["md5"] dump_stage_file(stage_file_path, content) with self._caplog.at_level(logging.WARNING, logger="dvc"): self._caplog.clear() self.main(["status", "-c"]) expected_warning = ( "Output 'bar'(Stage: 'bar.dvc') is missing version info." " Cache for it will not be collected." " Use dvc repro to get your pipeline up to date.") assert expected_warning in self._caplog.text
def test_ignored_in_checksum(self): stage = self.dvc.run( cmd="echo test > {}".format(self.FOO), deps=[self.BAR], outs=[self.FOO], ) d = stage.dumpd() self.assertEqual(d[stage.PARAM_WDIR], ".") d = load_stage_file(stage.relpath) self.assertEqual(d[stage.PARAM_WDIR], ".") del d[stage.PARAM_WDIR] dump_stage_file(stage.relpath, d) d = load_stage_file(stage.relpath) self.assertIsNone(d.get(stage.PARAM_WDIR)) with self.dvc.state: stage = Stage.load(self.dvc, stage.relpath) self.assertFalse(stage.changed())
def test_similar_paths(self): # File structure: # # . # |-- something.dvc (out.path == something) # |-- something # |__ something-1 # |-- a # |__ a.dvc (stage.cwd == something-1) self.dvc.run(outs=["something"], cmd="mkdir something") os.mkdir("something-1") stage = os.path.join("something-1", "a.dvc") stage_dump = {"cmd": "echo a > a", "outs": [{"path": "a"}]} dump_stage_file(stage, stage_dump) try: self.dvc.reproduce(stage) except StagePathAsOutputError: self.fail("should not raise StagePathAsOutputError")