Ejemplo n.º 1
0
def test_subrepos(tmp_dir, scm, dvc):
    tmp_dir.scm_gen(
        {"dir": {"repo.txt": "file to confuse RepoFileSystem"}},
        commit="dir/repo.txt",
    )

    subrepo1 = tmp_dir / "dir" / "repo"
    subrepo2 = tmp_dir / "dir" / "repo2"

    for repo in [subrepo1, subrepo2]:
        make_subrepo(repo, scm)

    subrepo1.dvc_gen({"foo": "foo", "dir1": {"bar": "bar"}}, commit="FOO")
    subrepo2.dvc_gen(
        {"lorem": "lorem", "dir2": {"ipsum": "ipsum"}}, commit="BAR"
    )

    dvc._reset()
    fs = RepoFileSystem(repo=dvc, subrepos=True)

    def assert_fs_belongs_to_repo(ret_val):
        method = fs._get_repo

        def f(*args, **kwargs):
            r = method(*args, **kwargs)
            assert r.root_dir == ret_val.root_dir
            return r

        return f

    with mock.patch.object(
        fs, "_get_repo", side_effect=assert_fs_belongs_to_repo(subrepo1.dvc)
    ):
        assert fs.exists(subrepo1 / "foo") is True
        assert fs.exists(subrepo1 / "bar") is False

        assert fs.isfile(subrepo1 / "foo") is True
        assert fs.isfile(subrepo1 / "dir1" / "bar") is True
        assert fs.isfile(subrepo1 / "dir1") is False

        assert fs.isdir(subrepo1 / "dir1") is True
        assert fs.isdir(subrepo1 / "dir1" / "bar") is False
        assert fs.isdvc(subrepo1 / "foo") is True

    with mock.patch.object(
        fs, "_get_repo", side_effect=assert_fs_belongs_to_repo(subrepo2.dvc)
    ):
        assert fs.exists(subrepo2 / "lorem") is True
        assert fs.exists(subrepo2 / "ipsum") is False

        assert fs.isfile(subrepo2 / "lorem") is True
        assert fs.isfile(subrepo2 / "dir2" / "ipsum") is True
        assert fs.isfile(subrepo2 / "dir2") is False

        assert fs.isdir(subrepo2 / "dir2") is True
        assert fs.isdir(subrepo2 / "dir2" / "ipsum") is False
        assert fs.isdvc(subrepo2 / "lorem") is True
Ejemplo n.º 2
0
def _collect_paths(
    repo: "Repo",
    targets: Iterable[str],
    recursive: bool = False,
    rev: str = None,
):
    from dvc.fs.repo import RepoFileSystem

    path_infos = [PathInfo(os.path.abspath(target)) for target in targets]
    fs = RepoFileSystem(repo)

    target_infos = []
    for path_info in path_infos:

        if recursive and fs.isdir(path_info):
            target_infos.extend(repo.dvcignore.walk_files(fs, path_info))

        if not fs.exists(path_info):
            if not recursive:
                if rev == "workspace" or rev == "":
                    logger.warning(
                        "'%s' was not found in current workspace.", path_info
                    )
                else:
                    logger.warning(
                        "'%s' was not found at: '%s'.", path_info, rev
                    )
            continue
        target_infos.append(path_info)
    return target_infos
Ejemplo n.º 3
0
def _collect_paths(
    repo: "Repo",
    targets: Iterable[str],
    recursive: bool = False,
    rev: str = None,
):
    from dvc.fs.repo import RepoFileSystem
    from dvc.utils import relpath

    fs_paths = [os.path.abspath(target) for target in targets]
    fs = RepoFileSystem(repo)

    target_paths = []
    for fs_path in fs_paths:

        if recursive and fs.isdir(fs_path):
            target_paths.extend(repo.dvcignore.find(fs, fs_path))

        if not fs.exists(fs_path):
            rel = relpath(fs_path)
            if rev == "workspace" or rev == "":
                logger.warning("'%s' was not found in current workspace.", rel)
            else:
                logger.warning("'%s' was not found at: '%s'.", rel, rev)
        target_paths.append(fs_path)
    return target_paths
Ejemplo n.º 4
0
def test_isdir_mixed(tmp_dir, dvc):
    tmp_dir.gen({"dir": {"foo": "foo", "bar": "bar"}})

    dvc.add(str(tmp_dir / "dir" / "foo"))

    fs = RepoFileSystem(repo=dvc)
    assert fs.isdir("dir")
    assert not fs.isfile("dir")
Ejemplo n.º 5
0
def test_repo_fs_no_subrepos(tmp_dir, dvc, scm):
    tmp_dir.scm_gen(
        {"dir": {
            "repo.txt": "file to confuse RepoFileSystem"
        }},
        commit="dir/repo.txt",
    )
    tmp_dir.dvc_gen({"lorem": "lorem"}, commit="add foo")

    subrepo = tmp_dir / "dir" / "repo"
    make_subrepo(subrepo, scm)
    with subrepo.chdir():
        subrepo.dvc_gen({"foo": "foo", "dir1": {"bar": "bar"}}, commit="FOO")
        subrepo.scm_gen({"ipsum": "ipsum"}, commit="BAR")

    # using fs that does not have dvcignore
    dvc._reset()
    fs = RepoFileSystem(repo=dvc)
    expected = [
        tmp_dir / ".dvcignore",
        tmp_dir / ".gitignore",
        tmp_dir / "lorem",
        tmp_dir / "lorem.dvc",
        tmp_dir / "dir",
        tmp_dir / "dir" / "repo.txt",
    ]

    actual = []
    for root, dirs, files in fs.walk(tmp_dir.fs_path, dvcfiles=True):
        for entry in dirs + files:
            actual.append(os.path.normpath(os.path.join(root, entry)))

    expected = [str(path) for path in expected]
    assert set(actual) == set(expected)
    assert len(actual) == len(expected)

    assert fs.isfile(tmp_dir / "lorem") is True
    assert fs.isfile(tmp_dir / "dir" / "repo" / "foo") is False
    assert fs.isdir(tmp_dir / "dir" / "repo") is False
    assert fs.isdir(tmp_dir / "dir") is True

    assert fs.isdvc(tmp_dir / "lorem") is True
    assert fs.isdvc(tmp_dir / "dir" / "repo" / "dir1") is False

    assert fs.exists(tmp_dir / "dir" / "repo.txt") is True
    assert fs.exists(tmp_dir / "repo" / "ipsum") is False
Ejemplo n.º 6
0
def test_exists_isdir_isfile_dirty(tmp_dir, dvc):
    tmp_dir.dvc_gen(
        {"datafile": "data", "datadir": {"foo": "foo", "bar": "bar"}}
    )

    fs = RepoFileSystem(repo=dvc)
    shutil.rmtree(tmp_dir / "datadir")
    (tmp_dir / "datafile").unlink()

    root = PathInfo(tmp_dir)
    assert fs.exists(root / "datafile")
    assert fs.exists(root / "datadir")
    assert fs.exists(root / "datadir" / "foo")
    assert fs.isfile(root / "datafile")
    assert not fs.isfile(root / "datadir")
    assert fs.isfile(root / "datadir" / "foo")
    assert not fs.isdir(root / "datafile")
    assert fs.isdir(root / "datadir")
    assert not fs.isdir(root / "datadir" / "foo")

    # NOTE: creating file instead of dir and dir instead of file
    tmp_dir.gen({"datadir": "data", "datafile": {"foo": "foo", "bar": "bar"}})
    assert fs.exists(root / "datafile")
    assert fs.exists(root / "datadir")
    assert not fs.exists(root / "datadir" / "foo")
    assert fs.exists(root / "datafile" / "foo")
    assert not fs.isfile(root / "datafile")
    assert fs.isfile(root / "datadir")
    assert not fs.isfile(root / "datadir" / "foo")
    assert fs.isfile(root / "datafile" / "foo")
    assert fs.isdir(root / "datafile")
    assert not fs.isdir(root / "datadir")
    assert not fs.isdir(root / "datadir" / "foo")
    assert not fs.isdir(root / "datafile" / "foo")
Ejemplo n.º 7
0
    def collect(
        self,
        targets: List[str] = None,
        revs: List[str] = None,
        recursive: bool = False,
    ) -> Dict[str, Dict]:
        """Collects all props and data for plots.

        Returns a structure like:
            {rev: {plots.csv: {
                props: {x: ..., "header": ..., ...},
                data: "...data as a string...",
            }}}
        Data parsing is postponed, since it's affected by props.
        """
        from dvc.fs.repo import RepoFileSystem
        from dvc.utils.collections import ensure_list

        targets = ensure_list(targets)
        data: Dict[str, Dict] = {}
        for rev in self.repo.brancher(revs=revs):
            # .brancher() adds unwanted workspace
            if revs is not None and rev not in revs:
                continue
            rev = rev or "workspace"

            fs = RepoFileSystem(self.repo)
            plots = _collect_plots(self.repo, targets, rev, recursive)
            for path_info, props in plots.items():

                if rev not in data:
                    data[rev] = {}

                if fs.isdir(path_info):
                    plot_files = []
                    for pi in fs.walk_files(path_info):
                        plot_files.append(
                            (pi, relpath(pi, self.repo.root_dir))
                        )
                else:
                    plot_files = [
                        (path_info, relpath(path_info, self.repo.root_dir))
                    ]

                for path, repo_path in plot_files:
                    data[rev].update({repo_path: {"props": props}})

                    # Load data from git or dvc cache
                    try:
                        with fs.open(path) as fd:
                            data[rev][repo_path]["data"] = fd.read()
                    except FileNotFoundError:
                        # This might happen simply because cache is absent
                        pass

        return data
Ejemplo n.º 8
0
def test_isdir_isfile(tmp_dir, dvc):
    tmp_dir.gen({"datafile": "data", "datadir": {"foo": "foo", "bar": "bar"}})

    fs = RepoFileSystem(repo=dvc)
    assert fs.isdir("datadir")
    assert not fs.isfile("datadir")
    assert not fs.isdvc("datadir")
    assert not fs.isdir("datafile")
    assert fs.isfile("datafile")
    assert not fs.isdvc("datafile")

    dvc.add(["datadir", "datafile"])
    shutil.rmtree(tmp_dir / "datadir")
    (tmp_dir / "datafile").unlink()

    assert fs.isdir("datadir")
    assert not fs.isfile("datadir")
    assert fs.isdvc("datadir")
    assert not fs.isdir("datafile")
    assert fs.isfile("datafile")
    assert fs.isdvc("datafile")
Ejemplo n.º 9
0
    def _collect_from_revision(
        self,
        targets: Optional[List[str]] = None,
        revision: Optional[str] = None,
        recursive: bool = False,
        onerror: Optional[Callable] = None,
        props: Optional[Dict] = None,
    ):
        from dvc.fs.repo import RepoFileSystem

        fs = RepoFileSystem(self.repo)
        plots = _collect_plots(self.repo, targets, revision, recursive)
        res: Dict[str, Any] = {}
        for fs_path, rev_props in plots.items():
            if fs.isdir(fs_path):
                plot_files = []
                unpacking_res = _unpack_dir_files(fs, fs_path, onerror=onerror)
                if "data" in unpacking_res:
                    for pi in unpacking_res.get(  # pylint: disable=E1101
                        "data"
                    ):
                        plot_files.append(
                            (pi, relpath(pi, self.repo.root_dir))
                        )
                else:
                    res[relpath(fs_path, self.repo.root_dir)] = unpacking_res
            else:
                plot_files = [(fs_path, relpath(fs_path, self.repo.root_dir))]

            props = props or {}

            for path, repo_path in plot_files:
                joined_props = {**rev_props, **props}
                res[repo_path] = {"props": joined_props}
                res[repo_path].update(
                    {
                        "data_source": partial(
                            parse,
                            fs,
                            path,
                            props=joined_props,
                            onerror=onerror,
                        )
                    }
                )
        return res
Ejemplo n.º 10
0
def test_isdir_isfile(tmp_dir, dvc):
    tmp_dir.gen(
        {
            "datafile": "data",
            "datadir": {
                "foo": "foo",
                "bar": "bar",
            },
            "subdir": {
                "baz": "baz",
                "data": {
                    "abc": "abc",
                    "xyz": "xyz",
                },
            },
        }, )

    fs = RepoFileSystem(repo=dvc)
    assert fs.isdir("datadir")
    assert not fs.isfile("datadir")
    assert not fs.isdvc("datadir")
    assert not fs.isdir("datafile")
    assert fs.isfile("datafile")
    assert not fs.isdvc("datafile")

    dvc.add([
        "datadir",
        "datafile",
        os.path.join("subdir", "baz"),
        os.path.join("subdir", "data"),
    ])
    shutil.rmtree(tmp_dir / "datadir")
    shutil.rmtree(tmp_dir / "subdir" / "data")
    (tmp_dir / "datafile").unlink()
    (tmp_dir / "subdir" / "baz").unlink()

    assert fs.isdir("datadir")
    assert not fs.isfile("datadir")
    assert fs.isdvc("datadir")
    assert not fs.isdir("datafile")
    assert fs.isfile("datafile")
    assert fs.isdvc("datafile")

    assert fs.isdir("subdir")
    assert not fs.isfile("subdir")
    assert not fs.isdvc("subdir")
    assert fs.isfile(os.path.join("subdir", "baz"))
    assert fs.isdir(os.path.join("subdir", "data"))
Ejemplo n.º 11
0
    def _collect_from_revision(
        self,
        targets: Optional[List[str]] = None,
        revision: Optional[str] = None,
        recursive: bool = False,
        onerror: Optional[Callable] = None,
        props: Optional[Dict] = None,
    ):
        from dvc.fs.repo import RepoFileSystem

        fs = RepoFileSystem(self.repo)
        plots = _collect_plots(self.repo, targets, revision, recursive)
        res = {}
        for path_info, rev_props in plots.items():

            if fs.isdir(path_info):
                plot_files = []
                for pi in fs.walk_files(path_info):
                    plot_files.append((pi, relpath(pi, self.repo.root_dir)))
            else:
                plot_files = [(path_info, relpath(path_info,
                                                  self.repo.root_dir))]

            props = props or {}

            for path, repo_path in plot_files:
                joined_props = {**rev_props, **props}
                res[repo_path] = {"props": joined_props}
                res[repo_path].update(
                    parse(
                        fs,
                        path,
                        props=joined_props,
                        onerror=onerror,
                    ))
        return res