def test_repro_when_new_out_overlaps_others_stage_outs(tmp_dir, dvc): from dvc.exceptions import OverlappingOutputPathsError tmp_dir.gen({"dir": {"file1": "file1"}, "foo": "foo"}) dvc.add("dir") dump_yaml( PIPELINE_FILE, { "stages": { "run-copy": { "cmd": "python copy {} {}".format("foo", "dir/foo"), "deps": ["foo"], "outs": ["dir/foo"], } } }, ) with pytest.raises(OverlappingOutputPathsError): dvc.reproduce(":run-copy")
def test_repro_when_new_outs_added_does_not_exist(tmp_dir, dvc): from dvc.exceptions import ReproductionError tmp_dir.gen("copy.py", COPY_SCRIPT) tmp_dir.gen("foo", "foo") dump_yaml( PIPELINE_FILE, { "stages": { "run-copy": { "cmd": "python copy.py {} {}".format("foo", "foobar"), "deps": ["foo"], "outs": ["foobar", "bar"], } } }, ) with pytest.raises(ReproductionError): dvc.reproduce(":run-copy")
def test_remove_stage_on_lockfile_format_error(tmp_dir, dvc, run_copy): tmp_dir.gen("foo", "foo") stage = run_copy("foo", "bar", name="copy-foo-bar") dvc_file = Dvcfile(dvc, stage.path) lock_file = dvc_file._lockfile data = dvc_file._load()[0] lock_data = lock_file.load() lock_data["gibberish"] = True data["gibberish"] = True dump_yaml(lock_file.relpath, lock_data) with pytest.raises(StageFileFormatError): dvc_file.remove_stage(stage) lock_file.remove() dvc_file.dump(stage) dump_yaml(dvc_file.relpath, data) with pytest.raises(StageFileFormatError): dvc_file.remove_stage(stage)
def test_repro_when_new_outs_is_added_in_dvcfile(tmp_dir, dvc): from dvc.dvcfile import Dvcfile tmp_dir.gen("copy.py", COPY_SCRIPT) tmp_dir.gen({"foo": "foo", "bar": "bar"}) stage = dvc.run( cmd="python copy.py {} {}".format("foo", "foobar"), outs=[], # scenario where user forgot to add deps=["foo"], name="copy-file", ) target = ":copy-file" assert not dvc.reproduce(target) dvcfile = Dvcfile(dvc, stage.path) data, _ = dvcfile._load() data["stages"]["copy-file"]["outs"] = ["foobar"] dump_yaml(stage.path, data) assert dvc.reproduce(target)[0] == stage
def test_repro_when_new_deps_is_added_in_dvcfile(tmp_dir, dvc, run_copy): from dvc.dvcfile import Dvcfile tmp_dir.gen("copy.py", COPY_SCRIPT) tmp_dir.gen({"foo": "foo", "bar": "bar"}) stage = dvc.run( cmd="python copy.py {} {}".format("foo", "foobar"), outs=["foobar"], deps=["foo"], name="copy-file", ) target = PIPELINE_FILE + ":copy-file" assert not dvc.reproduce(target) dvcfile = Dvcfile(dvc, stage.path) data, _ = dvcfile._load() data["stages"]["copy-file"]["deps"] += ["copy.py"] dump_yaml(stage.path, data) assert dvc.reproduce(target)[0] == stage
def test_repro_when_cmd_changes(tmp_dir, dvc, run_copy, mocker): from dvc.dvcfile import SingleStageFile tmp_dir.gen("foo", "foo") stage = run_copy("foo", "bar", single_stage=True) assert not dvc.reproduce(stage.addressing) from dvc.stage.run import cmd_run m = mocker.patch("dvc.stage.run.cmd_run", wraps=cmd_run) data = SingleStageFile(dvc, stage.path)._load()[0] data["cmd"] = " ".join(stage.cmd.split()) # change cmd spacing by two dump_yaml(stage.path, data) assert dvc.status([stage.addressing]) == { stage.addressing: ["changed checksum"] } assert dvc.reproduce(stage.addressing)[0] == stage m.assert_called_once_with(stage)
def test(self): stages = self.dvc.add(self.FOO) self.assertEqual(len(stages), 1) stage = stages[0] self.assertTrue(stage is not None) d = load_yaml(stage.relpath) # NOTE: checking that reloaded stage didn't change its checksum md5 = "11111111111111111111111111111111" d[stage.PARAM_MD5] = md5 dump_yaml(stage.relpath, d) dvcfile = SingleStageFile(self.dvc, stage.relpath) stage = dvcfile.stage self.assertTrue(stage is not None) dvcfile.dump(stage) d = load_yaml(stage.relpath) self.assertEqual(d[stage.PARAM_MD5], md5)
def _test(self): url = Local.get_url() self.main(["remote", "add", "-d", TEST_REMOTE, url]) stage = self.dvc.run(outs=["bar"], cmd="echo bar > bar", single_stage=True) self.main(["push"]) stage_file_path = stage.relpath content = load_yaml(stage_file_path) del content["outs"][0]["md5"] dump_yaml(stage_file_path, content) with self._caplog.at_level(logging.WARNING, logger="dvc"): self._caplog.clear() self.main(["status", "-c"]) expected_warning = ( "Output 'bar'(stage: 'bar.dvc') is missing version info. " "Cache for it will not be collected. " "Use `dvc repro` to get your pipeline up to date.") assert expected_warning in self._caplog.text
def test_repro_when_lockfile_gets_deleted(tmp_dir, dvc): tmp_dir.gen("copy.py", COPY_SCRIPT) tmp_dir.gen("foo", "foo") dump_yaml( PIPELINE_FILE, { "stages": { "run-copy": { "cmd": "python copy.py {} {}".format("foo", "foobar"), "deps": ["foo"], "outs": ["foobar"], } } }, ) assert dvc.reproduce(":run-copy") assert os.path.exists(PIPELINE_LOCK) assert not dvc.reproduce(":run-copy") os.unlink(PIPELINE_LOCK) stages = dvc.reproduce(":run-copy") assert (stages and stages[0].relpath == PIPELINE_FILE and stages[0].name == "run-copy")
def _dump_pipeline_file(self, stage): data = {} if self.exists(): with open(self.path) as fd: data = parse_yaml_for_update(fd.read(), self.path) else: logger.info("Creating '%s'", self.relpath) open(self.path, "w+").close() data["stages"] = data.get("stages", {}) stage_data = serialize.to_pipeline_file(stage) if data["stages"].get(stage.name): orig_stage_data = data["stages"][stage.name] apply_diff(stage_data[stage.name], orig_stage_data) else: data["stages"].update(stage_data) logger.info( "Adding stage '%s' to '%s'", stage.name, self.relpath, ) dump_yaml(self.path, data) self.repo.scm.track_file(self.relpath)
def save(self, stage): cache_key = _get_stage_hash(stage) if not cache_key: return cache = to_single_stage_lockfile(stage) cache_value = _get_cache_hash(cache) existing_cache = self._load_cache(cache_key, cache_value) cache = existing_cache or cache for out in self._uncached_outs(stage, cache): out.commit() if existing_cache: return # sanity check COMPILED_LOCK_FILE_STAGE_SCHEMA(cache) path = self._get_cache_path(cache_key, cache_value) dpath = os.path.dirname(path) makedirs(dpath, exist_ok=True) dump_yaml(path, cache)
def test(self): self._run( deps=[self.FOO], outs=["bar.txt"], cmd="echo bar > bar.txt", name="copybarbar-txt", ) self._run( deps=["bar.txt"], outs=["baz.txt"], cmd="echo baz > baz.txt", name="copybazbaz-txt", ) stage_dump = { "cmd": "echo baz > foo", "deps": [{"path": "baz.txt"}], "outs": [{"path": self.FOO}], } dump_yaml("cycle.dvc", stage_dump) with self.assertRaises(CyclicGraphError): self.dvc.reproduce("cycle.dvc")