Ejemplo n.º 1
0
def test_repotree_cache_save(tmp_dir, dvc, scm, erepo_dir, local_cloud):
    with erepo_dir.chdir():
        erepo_dir.gen({"dir": {"subdir": {"foo": "foo"}, "bar": "bar"}})
        erepo_dir.dvc_add("dir/subdir", commit="subdir")
        erepo_dir.scm_add("dir", commit="dir")
        erepo_dir.add_remote(config=local_cloud.config)
        erepo_dir.dvc.push()

    # test only cares that either fetch or stream are set so that DVC dirs are
    # walked.
    #
    # for this test, all file objects are being opened() and copied from tree
    # into dvc.cache, not fetched or streamed from a remote
    tree = RepoTree(erepo_dir.dvc, stream=True)
    expected = [
        tree.get_file_hash(PathInfo(erepo_dir / path))
        for path in ("dir/bar", "dir/subdir/foo")
    ]

    with erepo_dir.dvc.state:
        cache = dvc.cache.local
        with cache.tree.state:
            path_info = PathInfo(erepo_dir / "dir")
            hash_info = cache.tree.save_info(path_info)
            cache.save(path_info, tree, hash_info)

    for hash_ in expected:
        assert os.path.exists(cache.tree.hash_to_path_info(hash_))
Ejemplo n.º 2
0
    def _get_checksum(self, locked=True):
        from dvc.repo.tree import RepoTree

        with self._make_repo(locked=locked) as repo:
            try:
                return repo.find_out_by_relpath(self.def_path).info["md5"]
            except OutputNotFoundError:
                path = PathInfo(os.path.join(repo.root_dir, self.def_path))

                # we want stream but not fetch, so DVC out directories are
                # walked, but dir contents is not fetched
                tree = RepoTree(repo, stream=True)

                # We are polluting our repo cache with some dir listing here
                if tree.isdir(path):
                    return self.repo.cache.local.get_hash(path, tree=tree)
                return tree.get_file_hash(path)