def test_commit_no_exec_missing_out(tmp_dir, dvc): stage = dvc.run(name="my", cmd="mycmd", outs=["out"], no_exec=True) clean_staging() assert dvc.status(stage.path) with pytest.raises(OutputDoesNotExistError): dvc.commit(stage.path, force=True)
def test(self): from tests.utils import clean_staging cmd = f"python {self.CODE} {self.FOO} {self.BAR}" stage = self.dvc.run(deps=[self.FOO], outs=[self.BAR], cmd=cmd, single_stage=True) clean_staging() os.chmod(self.BAR, 0o644) with open(self.BAR, "w", encoding="utf-8") as fd: fd.write("corrupting the output cache") patch_checkout = mock.patch.object(stage.outs[0], "checkout", wraps=stage.outs[0].checkout) from dvc.stage.run import cmd_run patch_run = mock.patch("dvc.stage.run.cmd_run", wraps=cmd_run) with self.dvc.lock: with patch_checkout as mock_checkout: with patch_run as mock_run: stage.run() mock_run.assert_called_once() mock_checkout.assert_not_called()
def test_push_wildcard_from_bare_git_repo(tmp_dir, make_tmp_dir, erepo_dir, local_cloud): import git git.Repo.init(os.fspath(tmp_dir), bare=True) erepo_dir.add_remote(config=local_cloud.config) with erepo_dir.chdir(): erepo_dir.dvc_gen( { "dir123": { "foo": "foo content" }, "dirextra": { "extrafoo": "extra foo content" }, }, commit="initial", ) erepo_dir.dvc.push([os.path.join(os.fspath(erepo_dir), "dire*")], glob=True) erepo_dir.scm.gitpython.repo.create_remote("origin", os.fspath(tmp_dir)) erepo_dir.scm.gitpython.repo.remote("origin").push("master") dvc_repo = make_tmp_dir("dvc-repo", scm=True, dvc=True) with dvc_repo.chdir(): dvc_repo.dvc.imp(os.fspath(tmp_dir), "dirextra") clean_staging() with pytest.raises(PathMissingError): dvc_repo.dvc.imp(os.fspath(tmp_dir), "dir123")
def test_missing_cache(tmp_dir, dvc, local_remote, caplog): from tests.utils import clean_staging tmp_dir.dvc_gen({"foo": "foo", "bar": "bar"}) # purge cache remove(dvc.odb.local.cache_dir) clean_staging() header = ("Some of the cache files do not exist " "neither locally nor on remote. Missing cache files:\n") foo = "name: bar, md5: 37b51d194a7513e45b56f6524f2d51f2\n" bar = "name: foo, md5: acbd18db4cc2f85cedef654fccc4a4d8\n" caplog.clear() dvc.push() assert header in caplog.text assert foo in caplog.text assert bar in caplog.text caplog.clear() dvc.fetch() assert header in caplog.text assert foo in caplog.text assert bar in caplog.text caplog.clear() assert dvc.status(cloud=True) == { "bar": "missing", "foo": "missing", } assert header not in caplog.text assert foo not in caplog.text assert bar not in caplog.text
def test_subrepo(dvc_top_level, erepo): from tests.func.test_get import make_subrepo from tests.utils import clean_staging dvc_files = {"foo.txt": "foo.txt", "dvc_dir": {"lorem": "lorem"}} scm_files = {"bar.txt": "bar.txt", "scm_dir": {"ipsum": "ipsum"}} subrepo = erepo / "subrepo" make_subrepo(subrepo, erepo.scm) for repo in [erepo, subrepo]: with repo.chdir(): repo.scm_gen(scm_files, commit=f"scm track for top {repo}") if hasattr(repo, "dvc"): repo.dvc_gen(dvc_files, commit=f"dvc track for {repo}") clean_staging() def _list_files(repo, path=None): return set(map(itemgetter("path"), Repo.ls(os.fspath(repo), path))) extras = {".dvcignore", ".gitignore"} git_tracked_outputs = {"bar.txt", "scm_dir"} dvc_files = {"dvc_dir", "foo.txt", "foo.txt.dvc", "dvc_dir.dvc"} common_outputs = git_tracked_outputs | extras | dvc_files top_level_outputs = (common_outputs if dvc_top_level else git_tracked_outputs) assert _list_files(erepo) == top_level_outputs assert _list_files(erepo, "scm_dir") == {"ipsum"} if dvc_top_level: assert _list_files(erepo, "dvc_dir") == {"lorem"} assert _list_files(subrepo, ".") == common_outputs assert _list_files(subrepo, "scm_dir") == {"ipsum"} assert _list_files(subrepo, "dvc_dir") == {"lorem"}
def test_imported_entries_unchanged(tmp_dir, dvc, erepo_dir): with erepo_dir.chdir(): erepo_dir.dvc_gen("file", "file content", "initial commit") clean_staging() stage = dvc.imp(os.fspath(erepo_dir), "file") assert stage.changed_entries() == ([], [], None)
def test_indexed_on_push(tmp_dir, dvc, index): foo = tmp_dir.dvc_gen({"foo": "foo content"})[0].outs[0] bar = tmp_dir.dvc_gen({"bar": {"baz": "baz content"}})[0].outs[0] baz_hash = bar.obj.trie.get(("baz", ))[1] clean_staging() dvc.push() assert {bar.hash_info.value, baz_hash.value} == set(index.hashes()) assert [bar.hash_info.value] == list(index.dir_hashes()) assert foo.hash_info.value not in index.hashes()
def test_get_hash_dirty_dir(tmp_dir, dvc): tmp_dir.dvc_gen({"dir": {"foo": "foo", "bar": "bar"}}) (tmp_dir / "dir" / "baz").write_text("baz") clean_staging() fs = RepoFileSystem(repo=dvc) _, meta, obj = stage(dvc.odb.local, (tmp_dir / "dir").fs_path, fs, "md5") assert obj.hash_info == HashInfo("md5", "ba75a2162ca9c29acecb7957105a0bc2.dir") assert meta.nfiles == 3
def clean(outs, dvc=None): from tests.utils import clean_staging if dvc: outs = outs + [dvc.odb.local.cache_dir] for path in outs: print(path) remove(path) if dvc: os.makedirs(dvc.odb.local.cache_dir, exist_ok=True) clean_repos() clean_staging()
def test_commit_pipeline_stage(tmp_dir, dvc, run_copy): tmp_dir.gen("foo", "foo") stage = run_copy("foo", "bar", no_commit=True, name="copy-foo-bar") clean_staging() assert dvc.status(stage.addressing) assert dvc.commit(stage.addressing, force=True) == [stage] assert not dvc.status(stage.addressing) # just to confirm different variants work assert dvc.commit(f":{stage.addressing}") == [stage] assert dvc.commit(f"{PIPELINE_FILE}:{stage.addressing}") == [stage] assert dvc.commit(PIPELINE_FILE) == [stage]
def test_commit_no_exec(tmp_dir, dvc): tmp_dir.gen({"dep": "dep", "out": "out"}) stage = dvc.run(name="my", cmd="mycmd", deps=["dep"], outs=["out"], no_exec=True) clean_staging() assert dvc.status(stage.path) dvc.commit(stage.path, force=True) assert dvc.status(stage.path) == {}
def test_get_hash_mixed_dir(tmp_dir, scm, dvc): tmp_dir.gen({"dir": {"foo": "foo", "bar": "bar"}}) tmp_dir.dvc.add(os.path.join("dir", "foo")) tmp_dir.scm.add([ os.path.join("dir", "bar"), os.path.join("dir", ".gitignore"), os.path.join("dir", "foo.dvc"), ]) tmp_dir.scm.commit("add dir") clean_staging() fs = RepoFileSystem(repo=dvc) _, _, obj = stage(dvc.odb.local, (tmp_dir / "dir").fs_path, fs, "md5") assert obj.hash_info == HashInfo("md5", "e1d9e8eae5374860ae025ec84cfd85c7.dir")
def test_commit_changed_md5(tmp_dir, dvc): tmp_dir.gen({"file": "file content"}) (stage, ) = dvc.add("file", no_commit=True) stage_file_content = (tmp_dir / stage.path).parse() stage_file_content["md5"] = "1111111111" (tmp_dir / stage.path).dump(stage_file_content) clean_staging() with pytest.raises(StageCommitError): dvc.commit(stage.path) dvc.commit(stage.path, force=True) assert "md5" not in (tmp_dir / stage.path).parse()
def test_commit_granular_output_dir(tmp_dir, dvc): tmp_dir.gen({ "data": { "foo": "foo", "bar": "bar", "subdir": { "subfoo": "subfoo", "subbar": "subbar" }, } }) dvc.add("data", no_commit=True) clean_staging() dvc.commit("data") assert dvc.status() == {}
def test_circular_import(tmp_dir, dvc, scm, erepo_dir): from dvc.exceptions import CircularImportError with erepo_dir.chdir(): erepo_dir.dvc_gen({"dir": {"foo": "foo", "bar": "bar"}}, commit="init") dvc.imp(os.fspath(erepo_dir), "dir", "dir_imported") scm.add("dir_imported.dvc") scm.commit("import") clean_staging() with erepo_dir.chdir(): with pytest.raises(CircularImportError): erepo_dir.dvc.imp(os.fspath(tmp_dir), "dir_imported", "circular_import")
def test_commit_granular_output(tmp_dir, dvc): dvc.run( name="mystage", cmd=["echo foo>foo", "echo bar>bar"], outs=["foo", "bar"], no_commit=True, ) clean_staging() cache = tmp_dir / ".dvc" / "cache" assert not list(cache.glob("*/*")) dvc.commit("foo") assert list(cache.glob("*/*")) == [ cache / "d3" / "b07384d113edec49eaa6238ad5ff00" ]
def test_subrepos_are_ignored(tmp_dir, erepo_dir): subrepo = erepo_dir / "dir" / "subrepo" make_subrepo(subrepo, erepo_dir.scm) with erepo_dir.chdir(): erepo_dir.dvc_gen("dir/foo", "foo", commit="foo") erepo_dir.scm_gen("dir/bar", "bar", commit="bar") with subrepo.chdir(): subrepo.dvc_gen({"file": "file"}, commit="add files on subrepo") with external_repo(os.fspath(erepo_dir)) as repo: repo.dvcfs.get( "dir", os.fspath(tmp_dir / "out"), ) expected_files = {"foo": "foo", "bar": "bar", ".gitignore": "/foo\n"} assert (tmp_dir / "out").read_text() == expected_files # clear cache to test saving to cache cache_dir = tmp_dir / repo.odb.local.cache_dir remove(cache_dir) clean_staging() makedirs(cache_dir) staging, _, obj = stage( repo.odb.local, "dir", repo.dvcfs, "md5", ignore=repo.dvcignore, ) transfer( staging, repo.odb.local, {obj.hash_info}, shallow=False, hardlink=True, ) assert set(cache_dir.glob("??/*")) == { cache_dir / "e1" / "d9e8eae5374860ae025ec84cfd85c7", cache_dir / "e1" / "d9e8eae5374860ae025ec84cfd85c7.dir", cache_dir / "37" / "b51d194a7513e45b56f6524f2d51f2", cache_dir / "94" / "7d2b84e5aa88170e80dff467a5bfb6", cache_dir / "ac" / "bd18db4cc2f85cedef654fccc4a4d8", }
def test_commit_with_deps(tmp_dir, dvc, run_copy, run_kw): tmp_dir.gen("foo", "foo") (foo_stage, ) = dvc.add("foo", no_commit=True) assert foo_stage is not None assert len(foo_stage.outs) == 1 stage = run_copy("foo", "file", no_commit=True, **run_kw) assert stage is not None assert len(stage.outs) == 1 assert foo_stage.outs[0].changed_cache() assert stage.outs[0].changed_cache() clean_staging() dvc.commit(stage.path, with_deps=True) assert not foo_stage.outs[0].changed_cache() assert not stage.outs[0].changed_cache()
def test_stage_dir_optimization( tmp_dir, dvc, mocker, dry_run, expected_staging_contents ): from dvc_data import stage from dvc_data.objects.tree import Tree tmp_dir.dvc_gen( { "data": { "foo": "bar", "subdir": {"subfoo": "subbar"}, } } ) odb = dvc.odb.local objs = set(odb.all()) clean_staging() tmp_dir.gen({"data": {"baz": "quz"}}) stage_spy = mocker.spy(stage, "_stage_tree") _, _, tree = stage.stage(odb, "data", odb.fs, odb.fs.PARAM_CHECKSUM) assert stage_spy.called assert set(odb.all()) - objs == {tree.hash_info.as_raw().value} stage_spy.reset_mock() clean_staging() load_spy = mocker.spy(Tree, "load") build_tree_spy = mocker.spy(stage, "_build_tree") staging, _, tree = stage.stage( odb, "data", odb.fs, odb.fs.PARAM_CHECKSUM, dry_run=dry_run ) assert not stage_spy.called assert not build_tree_spy.called load_args, _ = load_spy.call_args assert load_args[1].value == tree.hash_info.as_raw().value assert set(staging.all()) == expected_staging_contents
def test_get_hash_dirty_file(tmp_dir, dvc): from dvc.data import check from dvc.data.stage import get_file_hash from dvc.objects.errors import ObjectFormatError tmp_dir.dvc_gen("file", "file") file_hash_info = HashInfo("md5", "8c7dd922ad47494fc02c388e12c00eac") (tmp_dir / "file").write_text("something") something_hash_info = HashInfo("md5", "437b930db84b8079c2dd804a71936b5f") clean_staging() # file is modified in workspace # get_file_hash(file) should return workspace hash, not DVC cached hash fs = RepoFileSystem(repo=dvc) assert fs.info((tmp_dir / "file").fs_path).get("md5") is None staging, _, obj = stage(dvc.odb.local, (tmp_dir / "file").fs_path, fs, "md5") assert obj.hash_info == something_hash_info check(staging, obj) # file is removed in workspace # any staged object referring to modified workspace obj is now invalid (tmp_dir / "file").unlink() with pytest.raises(ObjectFormatError): check(staging, obj) # get_file_hash(file) should return DVC cached hash assert fs.info((tmp_dir / "file").fs_path)["md5"] == file_hash_info.value _, hash_info = get_file_hash((tmp_dir / "file").fs_path, fs, "md5", state=dvc.state) assert hash_info == file_hash_info # tmp_dir/file can be staged even though it is missing in workspace since # repofs will use the DVC cached hash (and refer to the local cache object) _, _, obj = stage(dvc.odb.local, (tmp_dir / "file").fs_path, fs, "md5") assert obj.hash_info == file_hash_info
def test_commit_granular_dir(tmp_dir, dvc): tmp_dir.gen({ "data": { "foo": "foo", "bar": "bar", "subdir": { "subfoo": "subfoo", "subbar": "subbar" }, } }) dvc.add("data", no_commit=True) clean_staging() cache = tmp_dir / ".dvc" / "cache" assert set(cache.glob("*/*")) == { cache / "1a" / "ca2c799df82929bbdd976557975546", } dvc.commit(os.path.join("data", "foo")) assert set(cache.glob("*/*")) == { cache / "1a" / "ca2c799df82929bbdd976557975546", cache / "1a" / "ca2c799df82929bbdd976557975546.dir", cache / "ac" / "bd18db4cc2f85cedef654fccc4a4d8", } clean_staging() dvc.commit(os.path.join("data", "subdir")) assert set(cache.glob("*/*")) == { cache / "26" / "d6b64d96a660707412f523e8184b5f", cache / "1a" / "ca2c799df82929bbdd976557975546", cache / "1a" / "ca2c799df82929bbdd976557975546.dir", cache / "ac" / "bd18db4cc2f85cedef654fccc4a4d8", cache / "4c" / "e8d2a2cf314a52fa7f315ca37ca445", cache / "68" / "dde2c3c4e7953c2290f176bbdc9a54", } clean_staging() dvc.commit(os.path.join("data")) assert set(cache.glob("*/*")) == { cache / "26" / "d6b64d96a660707412f523e8184b5f", cache / "1a" / "ca2c799df82929bbdd976557975546", cache / "1a" / "ca2c799df82929bbdd976557975546.dir", cache / "ac" / "bd18db4cc2f85cedef654fccc4a4d8", cache / "4c" / "e8d2a2cf314a52fa7f315ca37ca445", cache / "68" / "dde2c3c4e7953c2290f176bbdc9a54", cache / "37" / "b51d194a7513e45b56f6524f2d51f2", }
def test_commit_granular_output_file(tmp_dir, dvc): tmp_dir.gen("foo", "foo") dvc.add("foo", no_commit=True) clean_staging() dvc.commit("foo") assert dvc.status() == {}