def test_walk(tmp_dir, dvc, dvcfiles, extra_expected): tmp_dir.gen({ "dir": { "subdir1": { "foo1": "foo1", "bar1": "bar1" }, "subdir2": { "foo2": "foo2" }, } }) dvc.add(str(tmp_dir / "dir"), recursive=True) tmp_dir.gen({"dir": {"foo": "foo", "bar": "bar"}}) tree = RepoTree(dvc) expected = [ PathInfo("dir") / "subdir1", PathInfo("dir") / "subdir2", PathInfo("dir") / "subdir1" / "foo1", PathInfo("dir") / "subdir1" / "bar1", PathInfo("dir") / "subdir2" / "foo2", PathInfo("dir") / "foo", PathInfo("dir") / "bar", ] actual = [] for root, dirs, files in tree.walk("dir", dvcfiles=dvcfiles): for entry in dirs + files: actual.append(os.path.join(root, entry)) expected = [str(path) for path in expected + extra_expected] assert set(actual) == set(expected) assert len(actual) == len(expected)
def test_walk_mixed_dir(tmp_dir, scm, dvc): tmp_dir.gen({"dir": {"foo": "foo", "bar": "bar"}}) tmp_dir.dvc.add(os.path.join("dir", "foo")) tmp_dir.scm.add( [ os.path.join("dir", "bar"), os.path.join("dir", ".gitignore"), os.path.join("dir", "foo.dvc"), ] ) tmp_dir.scm.commit("add dir") tree = RepoTree(dvc) expected = [ str(PathInfo("dir") / "foo"), str(PathInfo("dir") / "bar"), str(PathInfo("dir") / ".gitignore"), ] actual = [] for root, dirs, files in tree.walk("dir"): for entry in dirs + files: actual.append(os.path.join(root, entry)) assert set(actual) == set(expected) assert len(actual) == len(expected)
def test_walk_dirty(tmp_dir, dvc): tmp_dir.dvc_gen( { "dir": { "foo": "foo", "subdir1": {"foo1": "foo1", "bar1": "bar1"}, "subdir2": {"foo2": "foo2"}, } } ) tmp_dir.gen({"dir": {"bar": "bar", "subdir3": {"foo3": "foo3"}}}) (tmp_dir / "dir" / "foo").unlink() tree = RepoTree(dvc) expected = [ PathInfo("dir") / "subdir1", PathInfo("dir") / "subdir2", PathInfo("dir") / "subdir3", PathInfo("dir") / "subdir1" / "foo1", PathInfo("dir") / "subdir1" / "bar1", PathInfo("dir") / "subdir2" / "foo2", PathInfo("dir") / "subdir3" / "foo3", PathInfo("dir") / "bar", ] actual = [] for root, dirs, files in tree.walk("dir"): for entry in dirs + files: actual.append(os.path.join(root, entry)) expected = [str(path) for path in expected] assert set(actual) == set(expected) assert len(actual) == len(expected)
def test_walk_onerror(tmp_dir, dvc): def onerror(exc): raise exc tmp_dir.dvc_gen("foo", "foo") tree = RepoTree(dvc) # path does not exist for _ in tree.walk("dir"): pass with pytest.raises(OSError): for _ in tree.walk("dir", onerror=onerror): pass # path is not a directory for _ in tree.walk("foo"): pass with pytest.raises(OSError): for _ in tree.walk("foo", onerror=onerror): pass
def test_subrepo_walk(tmp_dir, scm, dvc, dvcfiles, extra_expected): tmp_dir.scm_gen( {"dir": { "repo.txt": "file to confuse RepoTree" }}, commit="dir/repo.txt", ) subrepo1 = tmp_dir / "dir" / "repo" subrepo2 = tmp_dir / "dir" / "repo2" subdirs = [subrepo1, subrepo2] for dir_ in subdirs: make_subrepo(dir_, scm) subrepo1.dvc_gen({"foo": "foo", "dir1": {"bar": "bar"}}, commit="FOO") subrepo2.dvc_gen({ "lorem": "lorem", "dir2": { "ipsum": "ipsum" } }, commit="BAR") # using tree that does not have dvcignore dvc.tree._reset() tree = RepoTree(dvc, subrepos=True, fetch=True) expected = [ PathInfo("dir") / "repo", PathInfo("dir") / "repo.txt", PathInfo("dir") / "repo2", PathInfo("dir") / "repo" / ".gitignore", PathInfo("dir") / "repo" / "foo", PathInfo("dir") / "repo" / "dir1", PathInfo("dir") / "repo" / "dir1" / "bar", PathInfo("dir") / "repo2" / ".gitignore", PathInfo("dir") / "repo2" / "lorem", PathInfo("dir") / "repo2" / "dir2", PathInfo("dir") / "repo2" / "dir2" / "ipsum", ] actual = [] for root, dirs, files in tree.walk(os.path.join(tree.root_dir, "dir"), dvcfiles=dvcfiles): for entry in dirs + files: actual.append(os.path.join(root, entry)) expected = [ os.path.join(tree.root_dir, path) for path in expected + extra_expected ] assert set(actual) == set(expected) assert len(actual) == len(expected)
def test_walk_nested_subrepos(tmp_dir, dvc, scm, traverse_subrepos): # generate a dvc and fs structure, with suffix based on repo's basename def fs_structure(suffix): return { f"foo-{suffix}": f"foo-{suffix}", f"dir-{suffix}": { f"bar-{suffix}": f"bar-{suffix}" }, } def dvc_structure(suffix): return { f"lorem-{suffix}": f"lorem-{suffix}", f"dvc-{suffix}": { f"ipsum-{suffix}": f"ipsum-{suffix}" }, } paths = ["subrepo1", "subrepo2", "subrepo1/subrepo3"] subrepos = [tmp_dir / path for path in paths] for repo_dir in subrepos: make_subrepo(repo_dir, scm) extras = {".dvcignore", ".gitignore"} # these files are always there expected = {} for repo_dir in subrepos + [tmp_dir]: base = os.path.basename(repo_dir) scm_files = fs_structure(base) dvc_files = dvc_structure(base) with repo_dir.chdir(): repo_dir.scm_gen(scm_files, commit=f"git add in {repo_dir}") repo_dir.dvc_gen(dvc_files, commit=f"dvc add in {repo_dir}") if traverse_subrepos or repo_dir == tmp_dir: expected[str(repo_dir)] = set(scm_files.keys() | dvc_files.keys() | extras) # files inside a dvc directory expected[str(repo_dir / f"dvc-{base}")] = {f"ipsum-{base}"} # files inside a git directory expected[str(repo_dir / f"dir-{base}")] = {f"bar-{base}"} if traverse_subrepos: # update subrepos expected[str(tmp_dir)].update(["subrepo1", "subrepo2"]) expected[str(tmp_dir / "subrepo1")].add("subrepo3") actual = {} tree = RepoTree(dvc, subrepos=traverse_subrepos) for root, dirs, files in tree.walk(str(tmp_dir)): actual[root] = set(dirs + files) assert expected == actual
def test_repotree_walk_fetch(tmp_dir, dvc, scm, local_remote): out = tmp_dir.dvc_gen({"dir": {"foo": "foo"}}, commit="init")[0].outs[0] dvc.push() remove(dvc.cache.local.cache_dir) remove(tmp_dir / "dir") tree = RepoTree(dvc, fetch=True) for _, _, _ in tree.walk("dir"): pass assert os.path.exists(out.cache_path) for _, hi in out.dir_cache.items(): assert hi.name == out.tree.PARAM_CHECKSUM assert os.path.exists(dvc.cache.local.tree.hash_to_path_info(hi.value))
def test_repotree_walk_fetch(tmp_dir, dvc, scm, local_remote): out = tmp_dir.dvc_gen({"dir": {"foo": "foo"}}, commit="init")[0].outs[0] dvc.push() remove(dvc.cache.local.cache_dir) tree = RepoTree(dvc, fetch=True) with dvc.state: for _, _, _ in tree.walk("dir"): pass assert os.path.exists(out.cache_path) for entry in out.dir_cache: hash_ = entry[out.tree.PARAM_CHECKSUM] assert os.path.exists(dvc.cache.local.tree.hash_to_path_info(hash_))
def test_walk_dirty_cached_dir(tmp_dir, scm, dvc): tmp_dir.dvc_gen( {"data": {"foo": "foo", "bar": "bar"}}, commit="add data", ) (tmp_dir / "data" / "foo").unlink() tree = RepoTree(dvc) data = PathInfo(tmp_dir) / "data" actual = [] for root, dirs, files in tree.walk(data): for entry in dirs + files: actual.append(os.path.join(root, entry)) assert actual == [(data / "bar").fspath]
def test_repo_tree_no_subrepos(tmp_dir, dvc, scm): tmp_dir.scm_gen( {"dir": { "repo.txt": "file to confuse RepoTree" }}, commit="dir/repo.txt", ) tmp_dir.dvc_gen({"lorem": "lorem"}, commit="add foo") subrepo = tmp_dir / "dir" / "repo" make_subrepo(subrepo, scm) subrepo.dvc_gen({"foo": "foo", "dir1": {"bar": "bar"}}, commit="FOO") subrepo.scm_gen({"ipsum": "ipsum"}, commit="BAR") # using tree that does not have dvcignore dvc.tree._reset() tree = RepoTree(dvc, subrepos=False, fetch=True) expected = [ tmp_dir / ".dvcignore", tmp_dir / ".gitignore", tmp_dir / "lorem", tmp_dir / "lorem.dvc", tmp_dir / "dir", tmp_dir / "dir" / "repo.txt", ] actual = [] for root, dirs, files in tree.walk(tmp_dir, dvcfiles=True): for entry in dirs + files: actual.append(os.path.normpath(os.path.join(root, entry))) expected = [str(path) for path in expected] assert set(actual) == set(expected) assert len(actual) == len(expected) assert tree.isfile(tmp_dir / "lorem") is True assert tree.isfile(tmp_dir / "dir" / "repo" / "foo") is False assert tree.isdir(tmp_dir / "dir" / "repo") is False assert tree.isdir(tmp_dir / "dir") is True assert tree.isdvc(tmp_dir / "lorem") is True assert tree.isdvc(tmp_dir / "dir" / "repo" / "dir1") is False assert tree.exists(tmp_dir / "dir" / "repo.txt") is True assert tree.exists(tmp_dir / "repo" / "ipsum") is False