def test_dir_hash_should_be_key_order_agnostic(tmp_dir, dvc): from dvc.oid import get_hash tmp_dir.gen({"data": {"1": "1 content", "2": "2 content"}}) path_info = PathInfo("data") dir_info = DirInfo.from_list([{ "relpath": "1", "md5": "1" }, { "relpath": "2", "md5": "2" }]) with patch( "dvc.oid._collect_dir", return_value=dir_info, ): hash1 = get_hash(path_info, dvc.cache.local.fs, "md5") dir_info = DirInfo.from_list([{ "md5": "1", "relpath": "1" }, { "md5": "2", "relpath": "2" }]) with patch( "dvc.oid._collect_dir", return_value=dir_info, ): hash2 = get_hash(path_info, dvc.cache.local.fs, "md5") assert hash1 == hash2
def test_get_hash_dirty_file(tmp_dir, dvc): tmp_dir.dvc_gen("file", "file") (tmp_dir / "file").write_text("something") fs = RepoFileSystem(dvc) assert fs.info(PathInfo(tmp_dir) / "file").get("md5") is None actual = get_hash(PathInfo(tmp_dir) / "file", fs, "md5") expected = HashInfo("md5", "437b930db84b8079c2dd804a71936b5f") assert actual == expected (tmp_dir / "file").unlink() assert (fs.info(PathInfo(tmp_dir) / "file")["md5"] == "8c7dd922ad47494fc02c388e12c00eac") actual = get_hash(PathInfo(tmp_dir) / "file", fs, "md5") expected = HashInfo("md5", "8c7dd922ad47494fc02c388e12c00eac") assert actual == expected
def test_get_hash_dirty_dir(tmp_dir, dvc): tmp_dir.dvc_gen({"dir": {"foo": "foo", "bar": "bar"}}) (tmp_dir / "dir" / "baz").write_text("baz") fs = DvcFileSystem(dvc) expected = "5ea40360f5b4ec688df672a4db9c17d1.dir" assert fs.info(PathInfo(tmp_dir) / "dir").get("md5") == expected assert get_hash(PathInfo(tmp_dir) / "dir", fs, "md5") == HashInfo("md5", expected)
def test_get_hash_dirty_file(tmp_dir, dvc): tmp_dir.dvc_gen("file", "file") (tmp_dir / "file").write_text("something") fs = DvcFileSystem(dvc) expected = "8c7dd922ad47494fc02c388e12c00eac" assert fs.info(PathInfo(tmp_dir) / "file").get("md5") == expected assert get_hash(PathInfo(tmp_dir) / "file", fs, "md5") == HashInfo("md5", expected)
def test_get_hash_dirty_dir(tmp_dir, dvc): tmp_dir.dvc_gen({"dir": {"foo": "foo", "bar": "bar"}}) (tmp_dir / "dir" / "baz").write_text("baz") fs = RepoFileSystem(dvc) actual = get_hash(PathInfo(tmp_dir) / "dir", fs, "md5") expected = HashInfo("md5", "ba75a2162ca9c29acecb7957105a0bc2.dir") assert actual == expected assert actual.dir_info.nfiles == 3
def _get_hash(self, locked=True): from dvc.oid import get_hash with self._make_repo(locked=locked) as repo: path_info = PathInfo(repo.root_dir) / self.def_path return get_hash(path_info, repo.repo_fs, "md5", follow_subrepos=False)
def test_get_hash_cached_file(tmp_dir, dvc, mocker): tmp_dir.dvc_gen({"foo": "foo"}) fs = RepoFileSystem(dvc) expected = "acbd18db4cc2f85cedef654fccc4a4d8" assert fs.info(PathInfo(tmp_dir) / "foo").get("md5") is None assert get_hash(PathInfo(tmp_dir) / "foo", fs, "md5") == HashInfo( "md5", expected, ) (tmp_dir / "foo").unlink() assert fs.info(PathInfo(tmp_dir) / "foo")["md5"] == expected
def test_get_hash_granular(tmp_dir, dvc): tmp_dir.dvc_gen( {"dir": { "foo": "foo", "bar": "bar", "subdir": { "data": "data" } }}) fs = DvcFileSystem(dvc) subdir = PathInfo(tmp_dir) / "dir" / "subdir" assert fs.info(subdir).get("md5") is None assert get_hash(subdir, fs, "md5") == HashInfo( "md5", "af314506f1622d107e0ed3f14ec1a3b5.dir", ) assert (fs.info(subdir / "data")["md5"] == "8d777f385d3dfec8815d20f7496026dc") assert get_hash(subdir / "data", fs, "md5") == HashInfo( "md5", "8d777f385d3dfec8815d20f7496026dc", )
def test_get_hash_mixed_dir(tmp_dir, scm, dvc): tmp_dir.gen({"dir": {"foo": "foo", "bar": "bar"}}) tmp_dir.dvc.add(os.path.join("dir", "foo")) tmp_dir.scm.add([ os.path.join("dir", "bar"), os.path.join("dir", ".gitignore"), os.path.join("dir", "foo.dvc"), ]) tmp_dir.scm.commit("add dir") fs = RepoFileSystem(dvc) actual = get_hash(PathInfo(tmp_dir) / "dir", fs, "md5") expected = HashInfo("md5", "e1d9e8eae5374860ae025ec84cfd85c7.dir") assert actual == expected
def test_get_hash_cached_dir(tmp_dir, dvc, mocker): tmp_dir.dvc_gen( {"dir": { "foo": "foo", "bar": "bar", "subdir": { "data": "data" } }}) fs = RepoFileSystem(dvc) expected = "8761c4e9acad696bee718615e23e22db.dir" assert fs.info(PathInfo(tmp_dir) / "dir").get("md5") is None assert get_hash(PathInfo(tmp_dir) / "dir", fs, "md5") == HashInfo( "md5", "8761c4e9acad696bee718615e23e22db.dir", ) shutil.rmtree(tmp_dir / "dir") assert fs.info(PathInfo(tmp_dir) / "dir")["md5"] == expected assert get_hash(PathInfo(tmp_dir) / "dir", fs, "md5") == HashInfo( "md5", "8761c4e9acad696bee718615e23e22db.dir", )
def get_url(path, repo=None, rev=None, remote=None): """ Returns the URL to the storage location of a data file or directory tracked in a DVC repo. For Git repos, HEAD is used unless a rev argument is supplied. The default remote is tried unless a remote argument is supplied. Raises OutputNotFoundError if the file is not tracked by DVC. NOTE: This function does not check for the actual existence of the file or directory in the remote storage. """ with Repo.open(repo, rev=rev, subrepos=True, uninitialized=True) as _repo: path_info = PathInfo(_repo.root_dir) / path with reraise(FileNotFoundError, PathMissingError(path, repo)): metadata = _repo.repo_fs.metadata(path_info) if not metadata.is_dvc: raise OutputNotFoundError(path, repo) cloud = metadata.repo.cloud hash_info = get_hash(path_info, _repo.repo_fs, "md5") return cloud.get_url_for(remote, checksum=hash_info.value)
def get_hash(self): if not self.use_cache: return get_hash(self.path_info, self.fs, self.fs.PARAM_CHECKSUM) return objects.stage(self.cache, self.path_info, self.fs).hash_info
def _to_checksum(output): if on_working_fs: return get_hash(output.path_info, repo.cache.local.fs, "md5").value return output.hash_info.value