Example #1
0
def test_open_dirty_no_hash(tmp_dir, dvc):
    tmp_dir.gen("file", "file")
    (tmp_dir / "file.dvc").write_text("outs:\n- path: file\n")

    tree = RepoTree(dvc)
    with tree.open("file", "r") as fobj:
        assert fobj.read() == "file"
Example #2
0
def test_repotree_cache_save(tmp_dir, dvc, scm, erepo_dir, setup_remote):
    with erepo_dir.chdir():
        erepo_dir.gen({"dir": {"subdir": {"foo": "foo"}, "bar": "bar"}})
        erepo_dir.dvc_add("dir/subdir", commit="subdir")
        erepo_dir.scm_add("dir", commit="dir")
        setup_remote(erepo_dir.dvc)
        erepo_dir.dvc.push()

    # test only cares that either fetch or stream are set so that DVC dirs are
    # walked.
    #
    # for this test, all file objects are being opened() and copied from tree
    # into dvc.cache, not fetched or streamed from a remote
    tree = RepoTree(erepo_dir.dvc, stream=True)
    expected = [
        tree.get_file_checksum(erepo_dir / path)
        for path in ("dir/bar", "dir/subdir/foo")
    ]

    with erepo_dir.dvc.state:
        cache = dvc.cache.local
        with cache.state:
            cache.save(PathInfo(erepo_dir / "dir"), None, tree=tree)
    for checksum in expected:
        assert os.path.exists(cache.checksum_to_path_info(checksum))
Example #3
0
def test_open_dirty_hash(tmp_dir, dvc):
    tmp_dir.dvc_gen("file", "file")
    (tmp_dir / "file").write_text("something")

    tree = RepoTree(dvc)
    with tree.open("file", "r") as fobj:
        assert fobj.read() == "something"
Example #4
0
def test_exists(tmp_dir, dvc):
    tmp_dir.gen("foo", "foo")
    dvc.add("foo")
    (tmp_dir / "foo").unlink()

    tree = RepoTree(dvc)
    assert tree.exists("foo")
Example #5
0
def test_repotree_cache_save(tmp_dir, dvc, scm, erepo_dir, local_cloud):
    with erepo_dir.chdir():
        erepo_dir.gen({"dir": {"subdir": {"foo": "foo"}, "bar": "bar"}})
        erepo_dir.dvc_add("dir/subdir", commit="subdir")
        erepo_dir.scm_add("dir", commit="dir")
        erepo_dir.add_remote(config=local_cloud.config)
        erepo_dir.dvc.push()

    # test only cares that either fetch or stream are set so that DVC dirs are
    # walked.
    #
    # for this test, all file objects are being opened() and copied from tree
    # into dvc.cache, not fetched or streamed from a remote
    tree = RepoTree(erepo_dir.dvc, stream=True)
    expected = [
        tree.get_file_hash(PathInfo(erepo_dir / path))
        for path in ("dir/bar", "dir/subdir/foo")
    ]

    with erepo_dir.dvc.state:
        cache = dvc.cache.local
        with cache.tree.state:
            path_info = PathInfo(erepo_dir / "dir")
            hash_info = cache.tree.save_info(path_info)
            cache.save(path_info, tree, hash_info)

    for hash_ in expected:
        assert os.path.exists(cache.tree.hash_to_path_info(hash_))
Example #6
0
def test_walk(tmp_dir, dvc, dvcfiles, extra_expected):
    tmp_dir.gen({
        "dir": {
            "subdir1": {
                "foo1": "foo1",
                "bar1": "bar1"
            },
            "subdir2": {
                "foo2": "foo2"
            },
        }
    })
    dvc.add(str(tmp_dir / "dir"), recursive=True)
    tmp_dir.gen({"dir": {"foo": "foo", "bar": "bar"}})
    tree = RepoTree(dvc)

    expected = [
        PathInfo("dir") / "subdir1",
        PathInfo("dir") / "subdir2",
        PathInfo("dir") / "subdir1" / "foo1",
        PathInfo("dir") / "subdir1" / "bar1",
        PathInfo("dir") / "subdir2" / "foo2",
        PathInfo("dir") / "foo",
        PathInfo("dir") / "bar",
    ]

    actual = []
    for root, dirs, files in tree.walk("dir", dvcfiles=dvcfiles):
        for entry in dirs + files:
            actual.append(os.path.join(root, entry))

    expected = [str(path) for path in expected + extra_expected]
    assert set(actual) == set(expected)
    assert len(actual) == len(expected)
Example #7
0
    def collect(self, targets=None, revs=None):
        """Collects all props and data for plots.

        Returns a structure like:
            {rev: {plots.csv: {
                props: {x: ..., "header": ..., ...},
                data: "...data as a string...",
            }}}
        Data parsing is postponed, since it's affected by props.
        """
        targets = [targets] if isinstance(targets, str) else targets or []
        data = {}
        for rev in self.repo.brancher(revs=revs):
            # .brancher() adds unwanted workspace
            if revs is not None and rev not in revs:
                continue
            rev = rev or "workspace"

            tree = RepoTree(self.repo)
            plots = _collect_plots(self.repo, targets, rev)
            for path_info, props in plots.items():
                datafile = relpath(path_info, self.repo.root_dir)
                if rev not in data:
                    data[rev] = {}
                data[rev].update({datafile: {"props": props}})

                # Load data from git or dvc cache
                try:
                    with tree.open(path_info) as fd:
                        data[rev][datafile]["data"] = fd.read()
                except FileNotFoundError:
                    # This might happen simply because cache is absent
                    pass

        return data
Example #8
0
def test_walk(tmp_dir, dvc):
    tmp_dir.gen(
        {
            "dir": {
                "subdir1": {"foo1": "foo1", "bar1": "bar1"},
                "subdir2": {"foo2": "foo2"},
            }
        }
    )
    dvc.add(str(tmp_dir / "dir"), recursive=True)
    tmp_dir.gen({"dir": {"foo": "foo", "bar": "bar"}})
    tree = RepoTree(dvc)

    expected = [
        os.path.join("dir", "subdir1"),
        os.path.join("dir", "subdir2"),
        os.path.join("dir", "subdir1", "foo1"),
        os.path.join("dir", "subdir1", "foo1.dvc"),
        os.path.join("dir", "subdir1", "bar1"),
        os.path.join("dir", "subdir1", "bar1.dvc"),
        os.path.join("dir", "subdir2", "foo2"),
        os.path.join("dir", "subdir2", "foo2.dvc"),
        os.path.join("dir", "foo"),
        os.path.join("dir", "bar"),
    ]

    actual = []
    for root, dirs, files in tree.walk("dir"):
        for entry in dirs + files:
            actual.append(os.path.join(root, entry))

    assert set(actual) == set(expected)
    assert len(actual) == len(expected)
Example #9
0
def test_isdir_mixed(tmp_dir, dvc):
    tmp_dir.gen({"dir": {"foo": "foo", "bar": "bar"}})

    dvc.add(str(tmp_dir / "dir" / "foo"))

    tree = RepoTree(dvc)
    assert tree.isdir("dir")
    assert not tree.isfile("dir")
Example #10
0
def test_open(tmp_dir, dvc):
    tmp_dir.gen("foo", "foo")
    dvc.add("foo")
    (tmp_dir / "foo").unlink()

    tree = RepoTree(dvc)
    with tree.open("foo", "r") as fobj:
        assert fobj.read() == "foo"
Example #11
0
 def open_by_relpath(self, path, mode="r", encoding=None, **kwargs):
     """Opens a specified resource as a file object."""
     tree = RepoTree(self)
     try:
         with tree.open(path, mode=mode, encoding=encoding,
                        **kwargs) as fobj:
             yield fobj
     except FileNotFoundError:
         raise PathMissingError(path, self.url)
Example #12
0
def _load_from_revision(repo, datafile, revision):
    from dvc.repo.tree import RepoTree

    tree = RepoTree(repo)

    try:
        with tree.open(datafile) as fobj:
            datafile_content = fobj.read()

    except (FileNotFoundError, PathMissingError):
        raise NoMetricOnRevisionError(datafile, revision)

    return plot_data(datafile, revision, datafile_content)
Example #13
0
def test_repotree_walk_fetch(tmp_dir, dvc, scm, local_remote):
    out = tmp_dir.dvc_gen({"dir": {"foo": "foo"}}, commit="init")[0].outs[0]
    dvc.push()
    remove(dvc.cache.local.cache_dir)

    tree = RepoTree(dvc, fetch=True)
    with dvc.state:
        for _, _, _ in tree.walk("dir"):
            pass

    assert os.path.exists(out.cache_path)
    for entry in out.dir_cache:
        hash_ = entry[out.tree.PARAM_CHECKSUM]
        assert os.path.exists(dvc.cache.local.tree.hash_to_path_info(hash_))
Example #14
0
def _ls(repo, path_info, recursive=None, dvc_only=False):
    from dvc.repo.tree import RepoTree

    def onerror(exc):
        raise exc

    # use our own RepoTree instance instead of repo.repo_tree since we do not
    # want fetch/stream enabled for ls
    tree = RepoTree(repo)

    ret = {}
    try:
        for root, dirs, files in tree.walk(path_info.fspath,
                                           onerror=onerror,
                                           dvcfiles=True):
            for fname in files:
                info = PathInfo(root) / fname
                dvc = tree.isdvc(info)
                if dvc or not dvc_only:
                    path = str(info.relative_to(path_info))
                    ret[path] = {
                        "isout": dvc,
                        "isdir": False,
                        "isexec": False if dvc else tree.isexec(info),
                    }

            if not recursive:
                for dname in dirs:
                    info = PathInfo(root) / dname
                    if not dvc_only or (tree.dvctree
                                        and tree.dvctree.exists(info)):
                        dvc = tree.isdvc(info)
                        path = str(info.relative_to(path_info))
                        ret[path] = {
                            "isout": dvc,
                            "isdir": True,
                            "isexec": False if dvc else tree.isexec(info),
                        }
                break
    except NotADirectoryError:
        dvc = tree.isdvc(path_info)
        if dvc or not dvc_only:
            return {
                path_info.name: {
                    "isout": dvc,
                    "isdir": False,
                    "isexec": False if dvc else tree.isexec(path_info),
                }
            }
        return {}
    except FileNotFoundError:
        return {}

    return ret
Example #15
0
    def _get_checksum(self, locked=True):
        from dvc.repo.tree import RepoTree

        with self._make_repo(locked=locked) as repo:
            try:
                return repo.find_out_by_relpath(self.def_path).info["md5"]
            except OutputNotFoundError:
                path = PathInfo(os.path.join(repo.root_dir, self.def_path))

                # we want stream but not fetch, so DVC out directories are
                # walked, but dir contents is not fetched
                tree = RepoTree(repo, stream=True)

                # We are polluting our repo cache with some dir listing here
                if tree.isdir(path):
                    return self.repo.cache.local.get_hash(path, tree=tree)
                return tree.get_file_hash(path)
Example #16
0
    def open_by_relpath(self, path, remote=None, mode="r", encoding=None):
        """Opens a specified resource as a file descriptor"""

        tree = RepoTree(self, stream=True)
        path = os.path.join(self.root_dir, path)
        try:
            with tree.open(
                    os.path.join(self.root_dir, path),
                    mode=mode,
                    encoding=encoding,
                    remote=remote,
            ) as fobj:
                yield fobj
        except FileNotFoundError as exc:
            raise FileMissingError(path) from exc
        except IsADirectoryError as exc:
            raise DvcIsADirectoryError from exc
Example #17
0
def test_open_in_history(tmp_dir, scm, dvc):
    tmp_dir.gen("foo", "foo")
    dvc.add("foo")
    dvc.scm.add(["foo.dvc", ".gitignore"])
    dvc.scm.commit("foo")

    tmp_dir.gen("foo", "foofoo")
    dvc.add("foo")
    dvc.scm.add(["foo.dvc", ".gitignore"])
    dvc.scm.commit("foofoo")

    for rev in dvc.brancher(revs=["HEAD~1"]):
        if rev == "working tree":
            continue

        tree = RepoTree(dvc)
        with tree.open("foo", "r") as fobj:
            assert fobj.read() == "foo"
Example #18
0
def test_isdvc(tmp_dir, dvc):
    tmp_dir.gen({"foo": "foo", "bar": "bar", "dir": {"baz": "baz"}})
    dvc.add("foo")
    dvc.add("dir")
    tree = RepoTree(dvc)
    assert tree.isdvc("foo")
    assert not tree.isdvc("bar")
    assert tree.isdvc("dir")
    assert not tree.isdvc("dir/baz")
    assert tree.isdvc("dir/baz", recursive=True, strict=False)
Example #19
0
def _read_metrics(repo, metrics, rev):
    tree = RepoTree(repo)

    res = {}
    for metric in metrics:
        if not tree.exists(fspath_py35(metric)):
            continue

        with tree.open(fspath_py35(metric), "r") as fobj:
            try:
                # NOTE this also supports JSON
                val = yaml.safe_load(fobj)
            except yaml.YAMLError:
                logger.debug(
                    "failed to read '%s' on '%s'", metric, rev, exc_info=True
                )
                continue

            val = _extract_metrics(val)
            if val:
                res[str(metric)] = val

    return res
Example #20
0
def test_walk_onerror(tmp_dir, dvc):
    def onerror(exc):
        raise exc

    tmp_dir.dvc_gen("foo", "foo")
    tree = RepoTree(dvc)

    # path does not exist
    for _ in tree.walk("dir"):
        pass
    with pytest.raises(OSError):
        for _ in tree.walk("dir", onerror=onerror):
            pass

    # path is not a directory
    for _ in tree.walk("foo"):
        pass
    with pytest.raises(OSError):
        for _ in tree.walk("foo", onerror=onerror):
            pass
Example #21
0
def test_isdvc(tmp_dir, dvc):
    tmp_dir.gen({"foo": "foo", "bar": "bar"})
    dvc.add("foo")
    tree = RepoTree(dvc)
    assert tree.isdvc("foo")
    assert not tree.isdvc("bar")
Example #22
0
 def repo_tree(self):
     return RepoTree(self, fetch=True)
Example #23
0
def test_isdir_isfile(tmp_dir, dvc):
    tmp_dir.gen({"datafile": "data", "datadir": {"foo": "foo", "bar": "bar"}})

    tree = RepoTree(dvc)
    assert tree.isdir("datadir")
    assert not tree.isfile("datadir")
    assert not tree.isdvc("datadir")
    assert not tree.isdir("datafile")
    assert tree.isfile("datafile")
    assert not tree.isdvc("datafile")

    dvc.add(["datadir", "datafile"])
    shutil.rmtree(tmp_dir / "datadir")
    (tmp_dir / "datafile").unlink()

    assert tree.isdir("datadir")
    assert not tree.isfile("datadir")
    assert tree.isdvc("datadir")
    assert not tree.isdir("datafile")
    assert tree.isfile("datafile")
    assert tree.isdvc("datafile")