def test_subrepos(tmp_dir, scm, dvc): tmp_dir.scm_gen( {"dir": {"repo.txt": "file to confuse RepoFileSystem"}}, commit="dir/repo.txt", ) subrepo1 = tmp_dir / "dir" / "repo" subrepo2 = tmp_dir / "dir" / "repo2" for repo in [subrepo1, subrepo2]: make_subrepo(repo, scm) subrepo1.dvc_gen({"foo": "foo", "dir1": {"bar": "bar"}}, commit="FOO") subrepo2.dvc_gen( {"lorem": "lorem", "dir2": {"ipsum": "ipsum"}}, commit="BAR" ) dvc._reset() fs = RepoFileSystem(repo=dvc, subrepos=True) def assert_fs_belongs_to_repo(ret_val): method = fs._get_repo def f(*args, **kwargs): r = method(*args, **kwargs) assert r.root_dir == ret_val.root_dir return r return f with mock.patch.object( fs, "_get_repo", side_effect=assert_fs_belongs_to_repo(subrepo1.dvc) ): assert fs.exists(subrepo1 / "foo") is True assert fs.exists(subrepo1 / "bar") is False assert fs.isfile(subrepo1 / "foo") is True assert fs.isfile(subrepo1 / "dir1" / "bar") is True assert fs.isfile(subrepo1 / "dir1") is False assert fs.isdir(subrepo1 / "dir1") is True assert fs.isdir(subrepo1 / "dir1" / "bar") is False assert fs.isdvc(subrepo1 / "foo") is True with mock.patch.object( fs, "_get_repo", side_effect=assert_fs_belongs_to_repo(subrepo2.dvc) ): assert fs.exists(subrepo2 / "lorem") is True assert fs.exists(subrepo2 / "ipsum") is False assert fs.isfile(subrepo2 / "lorem") is True assert fs.isfile(subrepo2 / "dir2" / "ipsum") is True assert fs.isfile(subrepo2 / "dir2") is False assert fs.isdir(subrepo2 / "dir2") is True assert fs.isdir(subrepo2 / "dir2" / "ipsum") is False assert fs.isdvc(subrepo2 / "lorem") is True
def _collect_paths( repo: "Repo", targets: Iterable[str], recursive: bool = False, rev: str = None, ): from dvc.fs.repo import RepoFileSystem path_infos = [PathInfo(os.path.abspath(target)) for target in targets] fs = RepoFileSystem(repo) target_infos = [] for path_info in path_infos: if recursive and fs.isdir(path_info): target_infos.extend(repo.dvcignore.walk_files(fs, path_info)) if not fs.exists(path_info): if not recursive: if rev == "workspace" or rev == "": logger.warning( "'%s' was not found in current workspace.", path_info ) else: logger.warning( "'%s' was not found at: '%s'.", path_info, rev ) continue target_infos.append(path_info) return target_infos
def _collect_paths( repo: "Repo", targets: Iterable[str], recursive: bool = False, rev: str = None, ): from dvc.fs.repo import RepoFileSystem from dvc.utils import relpath fs_paths = [os.path.abspath(target) for target in targets] fs = RepoFileSystem(repo) target_paths = [] for fs_path in fs_paths: if recursive and fs.isdir(fs_path): target_paths.extend(repo.dvcignore.find(fs, fs_path)) if not fs.exists(fs_path): rel = relpath(fs_path) if rev == "workspace" or rev == "": logger.warning("'%s' was not found in current workspace.", rel) else: logger.warning("'%s' was not found at: '%s'.", rel, rev) target_paths.append(fs_path) return target_paths
def test_isdir_mixed(tmp_dir, dvc): tmp_dir.gen({"dir": {"foo": "foo", "bar": "bar"}}) dvc.add(str(tmp_dir / "dir" / "foo")) fs = RepoFileSystem(repo=dvc) assert fs.isdir("dir") assert not fs.isfile("dir")
def test_repo_fs_no_subrepos(tmp_dir, dvc, scm): tmp_dir.scm_gen( {"dir": { "repo.txt": "file to confuse RepoFileSystem" }}, commit="dir/repo.txt", ) tmp_dir.dvc_gen({"lorem": "lorem"}, commit="add foo") subrepo = tmp_dir / "dir" / "repo" make_subrepo(subrepo, scm) with subrepo.chdir(): subrepo.dvc_gen({"foo": "foo", "dir1": {"bar": "bar"}}, commit="FOO") subrepo.scm_gen({"ipsum": "ipsum"}, commit="BAR") # using fs that does not have dvcignore dvc._reset() fs = RepoFileSystem(repo=dvc) expected = [ tmp_dir / ".dvcignore", tmp_dir / ".gitignore", tmp_dir / "lorem", tmp_dir / "lorem.dvc", tmp_dir / "dir", tmp_dir / "dir" / "repo.txt", ] actual = [] for root, dirs, files in fs.walk(tmp_dir.fs_path, dvcfiles=True): for entry in dirs + files: actual.append(os.path.normpath(os.path.join(root, entry))) expected = [str(path) for path in expected] assert set(actual) == set(expected) assert len(actual) == len(expected) assert fs.isfile(tmp_dir / "lorem") is True assert fs.isfile(tmp_dir / "dir" / "repo" / "foo") is False assert fs.isdir(tmp_dir / "dir" / "repo") is False assert fs.isdir(tmp_dir / "dir") is True assert fs.isdvc(tmp_dir / "lorem") is True assert fs.isdvc(tmp_dir / "dir" / "repo" / "dir1") is False assert fs.exists(tmp_dir / "dir" / "repo.txt") is True assert fs.exists(tmp_dir / "repo" / "ipsum") is False
def test_exists_isdir_isfile_dirty(tmp_dir, dvc): tmp_dir.dvc_gen( {"datafile": "data", "datadir": {"foo": "foo", "bar": "bar"}} ) fs = RepoFileSystem(repo=dvc) shutil.rmtree(tmp_dir / "datadir") (tmp_dir / "datafile").unlink() root = PathInfo(tmp_dir) assert fs.exists(root / "datafile") assert fs.exists(root / "datadir") assert fs.exists(root / "datadir" / "foo") assert fs.isfile(root / "datafile") assert not fs.isfile(root / "datadir") assert fs.isfile(root / "datadir" / "foo") assert not fs.isdir(root / "datafile") assert fs.isdir(root / "datadir") assert not fs.isdir(root / "datadir" / "foo") # NOTE: creating file instead of dir and dir instead of file tmp_dir.gen({"datadir": "data", "datafile": {"foo": "foo", "bar": "bar"}}) assert fs.exists(root / "datafile") assert fs.exists(root / "datadir") assert not fs.exists(root / "datadir" / "foo") assert fs.exists(root / "datafile" / "foo") assert not fs.isfile(root / "datafile") assert fs.isfile(root / "datadir") assert not fs.isfile(root / "datadir" / "foo") assert fs.isfile(root / "datafile" / "foo") assert fs.isdir(root / "datafile") assert not fs.isdir(root / "datadir") assert not fs.isdir(root / "datadir" / "foo") assert not fs.isdir(root / "datafile" / "foo")
def collect( self, targets: List[str] = None, revs: List[str] = None, recursive: bool = False, ) -> Dict[str, Dict]: """Collects all props and data for plots. Returns a structure like: {rev: {plots.csv: { props: {x: ..., "header": ..., ...}, data: "...data as a string...", }}} Data parsing is postponed, since it's affected by props. """ from dvc.fs.repo import RepoFileSystem from dvc.utils.collections import ensure_list targets = ensure_list(targets) data: Dict[str, Dict] = {} for rev in self.repo.brancher(revs=revs): # .brancher() adds unwanted workspace if revs is not None and rev not in revs: continue rev = rev or "workspace" fs = RepoFileSystem(self.repo) plots = _collect_plots(self.repo, targets, rev, recursive) for path_info, props in plots.items(): if rev not in data: data[rev] = {} if fs.isdir(path_info): plot_files = [] for pi in fs.walk_files(path_info): plot_files.append( (pi, relpath(pi, self.repo.root_dir)) ) else: plot_files = [ (path_info, relpath(path_info, self.repo.root_dir)) ] for path, repo_path in plot_files: data[rev].update({repo_path: {"props": props}}) # Load data from git or dvc cache try: with fs.open(path) as fd: data[rev][repo_path]["data"] = fd.read() except FileNotFoundError: # This might happen simply because cache is absent pass return data
def test_isdir_isfile(tmp_dir, dvc): tmp_dir.gen({"datafile": "data", "datadir": {"foo": "foo", "bar": "bar"}}) fs = RepoFileSystem(repo=dvc) assert fs.isdir("datadir") assert not fs.isfile("datadir") assert not fs.isdvc("datadir") assert not fs.isdir("datafile") assert fs.isfile("datafile") assert not fs.isdvc("datafile") dvc.add(["datadir", "datafile"]) shutil.rmtree(tmp_dir / "datadir") (tmp_dir / "datafile").unlink() assert fs.isdir("datadir") assert not fs.isfile("datadir") assert fs.isdvc("datadir") assert not fs.isdir("datafile") assert fs.isfile("datafile") assert fs.isdvc("datafile")
def _collect_from_revision( self, targets: Optional[List[str]] = None, revision: Optional[str] = None, recursive: bool = False, onerror: Optional[Callable] = None, props: Optional[Dict] = None, ): from dvc.fs.repo import RepoFileSystem fs = RepoFileSystem(self.repo) plots = _collect_plots(self.repo, targets, revision, recursive) res: Dict[str, Any] = {} for fs_path, rev_props in plots.items(): if fs.isdir(fs_path): plot_files = [] unpacking_res = _unpack_dir_files(fs, fs_path, onerror=onerror) if "data" in unpacking_res: for pi in unpacking_res.get( # pylint: disable=E1101 "data" ): plot_files.append( (pi, relpath(pi, self.repo.root_dir)) ) else: res[relpath(fs_path, self.repo.root_dir)] = unpacking_res else: plot_files = [(fs_path, relpath(fs_path, self.repo.root_dir))] props = props or {} for path, repo_path in plot_files: joined_props = {**rev_props, **props} res[repo_path] = {"props": joined_props} res[repo_path].update( { "data_source": partial( parse, fs, path, props=joined_props, onerror=onerror, ) } ) return res
def test_isdir_isfile(tmp_dir, dvc): tmp_dir.gen( { "datafile": "data", "datadir": { "foo": "foo", "bar": "bar", }, "subdir": { "baz": "baz", "data": { "abc": "abc", "xyz": "xyz", }, }, }, ) fs = RepoFileSystem(repo=dvc) assert fs.isdir("datadir") assert not fs.isfile("datadir") assert not fs.isdvc("datadir") assert not fs.isdir("datafile") assert fs.isfile("datafile") assert not fs.isdvc("datafile") dvc.add([ "datadir", "datafile", os.path.join("subdir", "baz"), os.path.join("subdir", "data"), ]) shutil.rmtree(tmp_dir / "datadir") shutil.rmtree(tmp_dir / "subdir" / "data") (tmp_dir / "datafile").unlink() (tmp_dir / "subdir" / "baz").unlink() assert fs.isdir("datadir") assert not fs.isfile("datadir") assert fs.isdvc("datadir") assert not fs.isdir("datafile") assert fs.isfile("datafile") assert fs.isdvc("datafile") assert fs.isdir("subdir") assert not fs.isfile("subdir") assert not fs.isdvc("subdir") assert fs.isfile(os.path.join("subdir", "baz")) assert fs.isdir(os.path.join("subdir", "data"))
def _collect_from_revision( self, targets: Optional[List[str]] = None, revision: Optional[str] = None, recursive: bool = False, onerror: Optional[Callable] = None, props: Optional[Dict] = None, ): from dvc.fs.repo import RepoFileSystem fs = RepoFileSystem(self.repo) plots = _collect_plots(self.repo, targets, revision, recursive) res = {} for path_info, rev_props in plots.items(): if fs.isdir(path_info): plot_files = [] for pi in fs.walk_files(path_info): plot_files.append((pi, relpath(pi, self.repo.root_dir))) else: plot_files = [(path_info, relpath(path_info, self.repo.root_dir))] props = props or {} for path, repo_path in plot_files: joined_props = {**rev_props, **props} res[repo_path] = {"props": joined_props} res[repo_path].update( parse( fs, path, props=joined_props, onerror=onerror, )) return res