Example #1
0
def test_dir_hash_should_be_key_order_agnostic(tmp_dir, dvc):
    from dvc.oid import get_hash

    tmp_dir.gen({"data": {"1": "1 content", "2": "2 content"}})

    path_info = PathInfo("data")

    dir_info = DirInfo.from_list([{
        "relpath": "1",
        "md5": "1"
    }, {
        "relpath": "2",
        "md5": "2"
    }])
    with patch(
            "dvc.oid._collect_dir",
            return_value=dir_info,
    ):
        hash1 = get_hash(path_info, dvc.cache.local.fs, "md5")

    dir_info = DirInfo.from_list([{
        "md5": "1",
        "relpath": "1"
    }, {
        "md5": "2",
        "relpath": "2"
    }])
    with patch(
            "dvc.oid._collect_dir",
            return_value=dir_info,
    ):
        hash2 = get_hash(path_info, dvc.cache.local.fs, "md5")

    assert hash1 == hash2
Example #2
0
def test_get_hash_dirty_file(tmp_dir, dvc):
    tmp_dir.dvc_gen("file", "file")
    (tmp_dir / "file").write_text("something")

    fs = RepoFileSystem(dvc)
    assert fs.info(PathInfo(tmp_dir) / "file").get("md5") is None
    actual = get_hash(PathInfo(tmp_dir) / "file", fs, "md5")
    expected = HashInfo("md5", "437b930db84b8079c2dd804a71936b5f")
    assert actual == expected

    (tmp_dir / "file").unlink()
    assert (fs.info(PathInfo(tmp_dir) /
                    "file")["md5"] == "8c7dd922ad47494fc02c388e12c00eac")
    actual = get_hash(PathInfo(tmp_dir) / "file", fs, "md5")
    expected = HashInfo("md5", "8c7dd922ad47494fc02c388e12c00eac")
    assert actual == expected
Example #3
0
def test_get_hash_dirty_dir(tmp_dir, dvc):
    tmp_dir.dvc_gen({"dir": {"foo": "foo", "bar": "bar"}})
    (tmp_dir / "dir" / "baz").write_text("baz")

    fs = DvcFileSystem(dvc)
    expected = "5ea40360f5b4ec688df672a4db9c17d1.dir"
    assert fs.info(PathInfo(tmp_dir) / "dir").get("md5") == expected
    assert get_hash(PathInfo(tmp_dir) / "dir", fs,
                    "md5") == HashInfo("md5", expected)
Example #4
0
def test_get_hash_dirty_file(tmp_dir, dvc):
    tmp_dir.dvc_gen("file", "file")
    (tmp_dir / "file").write_text("something")

    fs = DvcFileSystem(dvc)
    expected = "8c7dd922ad47494fc02c388e12c00eac"
    assert fs.info(PathInfo(tmp_dir) / "file").get("md5") == expected
    assert get_hash(PathInfo(tmp_dir) / "file", fs,
                    "md5") == HashInfo("md5", expected)
Example #5
0
def test_get_hash_dirty_dir(tmp_dir, dvc):
    tmp_dir.dvc_gen({"dir": {"foo": "foo", "bar": "bar"}})
    (tmp_dir / "dir" / "baz").write_text("baz")

    fs = RepoFileSystem(dvc)
    actual = get_hash(PathInfo(tmp_dir) / "dir", fs, "md5")
    expected = HashInfo("md5", "ba75a2162ca9c29acecb7957105a0bc2.dir")
    assert actual == expected
    assert actual.dir_info.nfiles == 3
Example #6
0
    def _get_hash(self, locked=True):
        from dvc.oid import get_hash

        with self._make_repo(locked=locked) as repo:
            path_info = PathInfo(repo.root_dir) / self.def_path
            return get_hash(path_info,
                            repo.repo_fs,
                            "md5",
                            follow_subrepos=False)
Example #7
0
def test_get_hash_cached_file(tmp_dir, dvc, mocker):
    tmp_dir.dvc_gen({"foo": "foo"})
    fs = RepoFileSystem(dvc)
    expected = "acbd18db4cc2f85cedef654fccc4a4d8"
    assert fs.info(PathInfo(tmp_dir) / "foo").get("md5") is None
    assert get_hash(PathInfo(tmp_dir) / "foo", fs, "md5") == HashInfo(
        "md5",
        expected,
    )
    (tmp_dir / "foo").unlink()
    assert fs.info(PathInfo(tmp_dir) / "foo")["md5"] == expected
Example #8
0
def test_get_hash_granular(tmp_dir, dvc):
    tmp_dir.dvc_gen(
        {"dir": {
            "foo": "foo",
            "bar": "bar",
            "subdir": {
                "data": "data"
            }
        }})
    fs = DvcFileSystem(dvc)
    subdir = PathInfo(tmp_dir) / "dir" / "subdir"
    assert fs.info(subdir).get("md5") is None
    assert get_hash(subdir, fs, "md5") == HashInfo(
        "md5",
        "af314506f1622d107e0ed3f14ec1a3b5.dir",
    )
    assert (fs.info(subdir /
                    "data")["md5"] == "8d777f385d3dfec8815d20f7496026dc")
    assert get_hash(subdir / "data", fs, "md5") == HashInfo(
        "md5",
        "8d777f385d3dfec8815d20f7496026dc",
    )
Example #9
0
def test_get_hash_mixed_dir(tmp_dir, scm, dvc):
    tmp_dir.gen({"dir": {"foo": "foo", "bar": "bar"}})
    tmp_dir.dvc.add(os.path.join("dir", "foo"))
    tmp_dir.scm.add([
        os.path.join("dir", "bar"),
        os.path.join("dir", ".gitignore"),
        os.path.join("dir", "foo.dvc"),
    ])
    tmp_dir.scm.commit("add dir")

    fs = RepoFileSystem(dvc)
    actual = get_hash(PathInfo(tmp_dir) / "dir", fs, "md5")
    expected = HashInfo("md5", "e1d9e8eae5374860ae025ec84cfd85c7.dir")
    assert actual == expected
Example #10
0
def test_get_hash_cached_dir(tmp_dir, dvc, mocker):
    tmp_dir.dvc_gen(
        {"dir": {
            "foo": "foo",
            "bar": "bar",
            "subdir": {
                "data": "data"
            }
        }})
    fs = RepoFileSystem(dvc)
    expected = "8761c4e9acad696bee718615e23e22db.dir"
    assert fs.info(PathInfo(tmp_dir) / "dir").get("md5") is None
    assert get_hash(PathInfo(tmp_dir) / "dir", fs, "md5") == HashInfo(
        "md5",
        "8761c4e9acad696bee718615e23e22db.dir",
    )

    shutil.rmtree(tmp_dir / "dir")
    assert fs.info(PathInfo(tmp_dir) / "dir")["md5"] == expected
    assert get_hash(PathInfo(tmp_dir) / "dir", fs, "md5") == HashInfo(
        "md5",
        "8761c4e9acad696bee718615e23e22db.dir",
    )
Example #11
0
def get_url(path, repo=None, rev=None, remote=None):
    """
    Returns the URL to the storage location of a data file or directory tracked
    in a DVC repo. For Git repos, HEAD is used unless a rev argument is
    supplied. The default remote is tried unless a remote argument is supplied.

    Raises OutputNotFoundError if the file is not tracked by DVC.

    NOTE: This function does not check for the actual existence of the file or
    directory in the remote storage.
    """
    with Repo.open(repo, rev=rev, subrepos=True, uninitialized=True) as _repo:
        path_info = PathInfo(_repo.root_dir) / path
        with reraise(FileNotFoundError, PathMissingError(path, repo)):
            metadata = _repo.repo_fs.metadata(path_info)

        if not metadata.is_dvc:
            raise OutputNotFoundError(path, repo)

        cloud = metadata.repo.cloud
        hash_info = get_hash(path_info, _repo.repo_fs, "md5")
        return cloud.get_url_for(remote, checksum=hash_info.value)
Example #12
0
 def get_hash(self):
     if not self.use_cache:
         return get_hash(self.path_info, self.fs, self.fs.PARAM_CHECKSUM)
     return objects.stage(self.cache, self.path_info, self.fs).hash_info
Example #13
0
 def _to_checksum(output):
     if on_working_fs:
         return get_hash(output.path_info, repo.cache.local.fs, "md5").value
     return output.hash_info.value