def test_walk(tmp_dir, dvc, dvcfiles, extra_expected): tmp_dir.gen({ "dir": { "subdir1": { "foo1": "foo1", "bar1": "bar1" }, "subdir2": { "foo2": "foo2" }, } }) dvc.add(str(tmp_dir / "dir"), recursive=True) tmp_dir.gen({"dir": {"foo": "foo", "bar": "bar"}}) tree = RepoTree(dvc) expected = [ PathInfo("dir") / "subdir1", PathInfo("dir") / "subdir2", PathInfo("dir") / "subdir1" / "foo1", PathInfo("dir") / "subdir1" / "bar1", PathInfo("dir") / "subdir2" / "foo2", PathInfo("dir") / "foo", PathInfo("dir") / "bar", ] actual = [] for root, dirs, files in tree.walk("dir", dvcfiles=dvcfiles): for entry in dirs + files: actual.append(os.path.join(root, entry)) expected = [str(path) for path in expected + extra_expected] assert set(actual) == set(expected) assert len(actual) == len(expected)
def test_walk(tmp_dir, dvc): tmp_dir.gen( { "dir": { "subdir1": {"foo1": "foo1", "bar1": "bar1"}, "subdir2": {"foo2": "foo2"}, } } ) dvc.add(str(tmp_dir / "dir"), recursive=True) tmp_dir.gen({"dir": {"foo": "foo", "bar": "bar"}}) tree = RepoTree(dvc) expected = [ os.path.join("dir", "subdir1"), os.path.join("dir", "subdir2"), os.path.join("dir", "subdir1", "foo1"), os.path.join("dir", "subdir1", "foo1.dvc"), os.path.join("dir", "subdir1", "bar1"), os.path.join("dir", "subdir1", "bar1.dvc"), os.path.join("dir", "subdir2", "foo2"), os.path.join("dir", "subdir2", "foo2.dvc"), os.path.join("dir", "foo"), os.path.join("dir", "bar"), ] actual = [] for root, dirs, files in tree.walk("dir"): for entry in dirs + files: actual.append(os.path.join(root, entry)) assert set(actual) == set(expected) assert len(actual) == len(expected)
def _ls(repo, path_info, recursive=None, dvc_only=False): from dvc.repo.tree import RepoTree def onerror(exc): raise exc # use our own RepoTree instance instead of repo.repo_tree since we do not # want fetch/stream enabled for ls tree = RepoTree(repo) ret = {} try: for root, dirs, files in tree.walk(path_info.fspath, onerror=onerror, dvcfiles=True): for fname in files: info = PathInfo(root) / fname dvc = tree.isdvc(info) if dvc or not dvc_only: path = str(info.relative_to(path_info)) ret[path] = { "isout": dvc, "isdir": False, "isexec": False if dvc else tree.isexec(info), } if not recursive: for dname in dirs: info = PathInfo(root) / dname if not dvc_only or (tree.dvctree and tree.dvctree.exists(info)): dvc = tree.isdvc(info) path = str(info.relative_to(path_info)) ret[path] = { "isout": dvc, "isdir": True, "isexec": False if dvc else tree.isexec(info), } break except NotADirectoryError: dvc = tree.isdvc(path_info) if dvc or not dvc_only: return { path_info.name: { "isout": dvc, "isdir": False, "isexec": False if dvc else tree.isexec(path_info), } } return {} except FileNotFoundError: return {} return ret
def test_walk_onerror(tmp_dir, dvc): def onerror(exc): raise exc tmp_dir.dvc_gen("foo", "foo") tree = RepoTree(dvc) # path does not exist for _ in tree.walk("dir"): pass with pytest.raises(OSError): for _ in tree.walk("dir", onerror=onerror): pass # path is not a directory for _ in tree.walk("foo"): pass with pytest.raises(OSError): for _ in tree.walk("foo", onerror=onerror): pass
def test_repotree_walk_fetch(tmp_dir, dvc, scm, local_remote): out = tmp_dir.dvc_gen({"dir": {"foo": "foo"}}, commit="init")[0].outs[0] dvc.push() remove(dvc.cache.local.cache_dir) tree = RepoTree(dvc, fetch=True) with dvc.state: for _, _, _ in tree.walk("dir"): pass assert os.path.exists(out.cache_path) for entry in out.dir_cache: hash_ = entry[out.tree.PARAM_CHECKSUM] assert os.path.exists(dvc.cache.local.tree.hash_to_path_info(hash_))