def _collect_plots(repo, targets=None, rev=None): plots = {out for stage in repo.stages for out in stage.outs if out.plot} def to_result(plots): return {plot.path_info: _plot_props(plot) for plot in plots} if not targets: return to_result(plots) target_infos = {PathInfo(os.path.abspath(target)) for target in targets} target_plots = set() for p in plots: if p.path_info in target_infos: target_plots.add(p) target_infos.remove(p.path_info) tree = RepoTree(repo) result = to_result(target_plots) for t in target_infos: if tree.isfile(t): result[t] = {} else: logger.warning( "'%s' was not found at: '%s'. It will not be plotted.", t, rev, ) return result
def _collect_metrics(repo, targets, recursive): if targets: target_infos = [ PathInfo(os.path.abspath(target)) for target in targets ] tree = RepoTree(repo) rec_files = [] if recursive: for target_info in target_infos: if tree.isdir(target_info): rec_files.extend(list(tree.walk_files(target_info))) result = [t for t in target_infos if tree.isfile(t)] result.extend(rec_files) return result metrics = set() for stage in repo.stages: for out in stage.outs: if not out.metric: continue metrics.add(out.path_info) return list(metrics)
def _collect_paths( repo: Repo, targets: Iterable[str], recursive: bool = False, rev: str = None, ): path_infos = {PathInfo(os.path.abspath(target)) for target in targets} tree = RepoTree(repo) target_infos = set() for path_info in path_infos: if recursive and tree.isdir(path_info): target_infos.update(set(tree.walk_files(path_info))) if not tree.isfile(path_info): if not recursive: logger.warning( "'%s' was not found at: '%s'.", path_info, rev, ) continue target_infos.add(path_info) return target_infos
def test_isdir_mixed(tmp_dir, dvc): tmp_dir.gen({"dir": {"foo": "foo", "bar": "bar"}}) dvc.add(str(tmp_dir / "dir" / "foo")) tree = RepoTree(dvc) assert tree.isdir("dir") assert not tree.isfile("dir")
def test_repo_tree_no_subrepos(tmp_dir, dvc, scm): tmp_dir.scm_gen( {"dir": { "repo.txt": "file to confuse RepoTree" }}, commit="dir/repo.txt", ) tmp_dir.dvc_gen({"lorem": "lorem"}, commit="add foo") subrepo = tmp_dir / "dir" / "repo" make_subrepo(subrepo, scm) subrepo.dvc_gen({"foo": "foo", "dir1": {"bar": "bar"}}, commit="FOO") subrepo.scm_gen({"ipsum": "ipsum"}, commit="BAR") # using tree that does not have dvcignore dvc.tree._reset() tree = RepoTree(dvc, subrepos=False, fetch=True) expected = [ tmp_dir / ".dvcignore", tmp_dir / ".gitignore", tmp_dir / "lorem", tmp_dir / "lorem.dvc", tmp_dir / "dir", tmp_dir / "dir" / "repo.txt", ] actual = [] for root, dirs, files in tree.walk(tmp_dir, dvcfiles=True): for entry in dirs + files: actual.append(os.path.normpath(os.path.join(root, entry))) expected = [str(path) for path in expected] assert set(actual) == set(expected) assert len(actual) == len(expected) assert tree.isfile(tmp_dir / "lorem") is True assert tree.isfile(tmp_dir / "dir" / "repo" / "foo") is False assert tree.isdir(tmp_dir / "dir" / "repo") is False assert tree.isdir(tmp_dir / "dir") is True assert tree.isdvc(tmp_dir / "lorem") is True assert tree.isdvc(tmp_dir / "dir" / "repo" / "dir1") is False assert tree.exists(tmp_dir / "dir" / "repo.txt") is True assert tree.exists(tmp_dir / "repo" / "ipsum") is False
def test_exists_isdir_isfile_dirty(tmp_dir, dvc): tmp_dir.dvc_gen( {"datafile": "data", "datadir": {"foo": "foo", "bar": "bar"}} ) tree = RepoTree(dvc) shutil.rmtree(tmp_dir / "datadir") (tmp_dir / "datafile").unlink() root = PathInfo(tmp_dir) assert tree.exists(root / "datafile") assert tree.exists(root / "datadir") assert tree.exists(root / "datadir" / "foo") assert tree.isfile(root / "datafile") assert not tree.isfile(root / "datadir") assert tree.isfile(root / "datadir" / "foo") assert not tree.isdir(root / "datafile") assert tree.isdir(root / "datadir") assert not tree.isdir(root / "datadir" / "foo") # NOTE: creating file instead of dir and dir instead of file tmp_dir.gen({"datadir": "data", "datafile": {"foo": "foo", "bar": "bar"}}) assert tree.exists(root / "datafile") assert tree.exists(root / "datadir") assert not tree.exists(root / "datadir" / "foo") assert tree.exists(root / "datafile" / "foo") assert not tree.isfile(root / "datafile") assert tree.isfile(root / "datadir") assert not tree.isfile(root / "datadir" / "foo") assert tree.isfile(root / "datafile" / "foo") assert tree.isdir(root / "datafile") assert not tree.isdir(root / "datadir") assert not tree.isdir(root / "datadir" / "foo") assert not tree.isdir(root / "datafile" / "foo")
def test_isdir_isfile(tmp_dir, dvc): tmp_dir.gen({"datafile": "data", "datadir": {"foo": "foo", "bar": "bar"}}) tree = RepoTree(dvc) assert tree.isdir("datadir") assert not tree.isfile("datadir") assert not tree.isdvc("datadir") assert not tree.isdir("datafile") assert tree.isfile("datafile") assert not tree.isdvc("datafile") dvc.add(["datadir", "datafile"]) shutil.rmtree(tmp_dir / "datadir") (tmp_dir / "datafile").unlink() assert tree.isdir("datadir") assert not tree.isfile("datadir") assert tree.isdvc("datadir") assert not tree.isdir("datafile") assert tree.isfile("datafile") assert tree.isdvc("datafile")
def _read_metrics(repo, metrics, rev): tree = RepoTree(repo) res = {} for metric in metrics: if not tree.isfile(metric): continue try: val = load_yaml(metric, tree=tree) except (FileNotFoundError, YAMLFileCorruptedError): logger.debug("failed to read '%s' on '%s'", metric, rev, exc_info=True) continue val = _extract_metrics(val, metric, rev) if val not in (None, {}): res[str(metric)] = val return res
def test_subrepos(tmp_dir, scm, dvc): tmp_dir.scm_gen( {"dir": { "repo.txt": "file to confuse RepoTree" }}, commit="dir/repo.txt", ) subrepo1 = tmp_dir / "dir" / "repo" subrepo2 = tmp_dir / "dir" / "repo2" for repo in [subrepo1, subrepo2]: make_subrepo(repo, scm) subrepo1.dvc_gen({"foo": "foo", "dir1": {"bar": "bar"}}, commit="FOO") subrepo2.dvc_gen({ "lorem": "lorem", "dir2": { "ipsum": "ipsum" } }, commit="BAR") dvc.tree._reset() tree = RepoTree(dvc, subrepos=True, fetch=True) def assert_tree_belongs_to_repo(ret_val): method = tree._get_repo def f(*args, **kwargs): r = method(*args, **kwargs) assert r.root_dir == ret_val.root_dir return r return f with mock.patch.object( tree, "_get_repo", side_effect=assert_tree_belongs_to_repo(subrepo1.dvc), ): assert tree.exists(subrepo1 / "foo") is True assert tree.exists(subrepo1 / "bar") is False assert tree.isfile(subrepo1 / "foo") is True assert tree.isfile(subrepo1 / "dir1" / "bar") is True assert tree.isfile(subrepo1 / "dir1") is False assert tree.isdir(subrepo1 / "dir1") is True assert tree.isdir(subrepo1 / "dir1" / "bar") is False assert tree.isdvc(subrepo1 / "foo") is True with mock.patch.object( tree, "_get_repo", side_effect=assert_tree_belongs_to_repo(subrepo2.dvc), ): assert tree.exists(subrepo2 / "lorem") is True assert tree.exists(subrepo2 / "ipsum") is False assert tree.isfile(subrepo2 / "lorem") is True assert tree.isfile(subrepo2 / "dir2" / "ipsum") is True assert tree.isfile(subrepo2 / "dir2") is False assert tree.isdir(subrepo2 / "dir2") is True assert tree.isdir(subrepo2 / "dir2" / "ipsum") is False assert tree.isdvc(subrepo2 / "lorem") is True