Ejemplo n.º 1
0
def test_dir_hash_should_be_key_order_agnostic(tmp_dir, dvc):
    from dvc.objects.stage import get_hash

    tmp_dir.gen({"data": {"1": "1 content", "2": "2 content"}})

    path_info = PathInfo("data")

    dir_info = DirInfo.from_list([{
        "relpath": "1",
        "md5": "1"
    }, {
        "relpath": "2",
        "md5": "2"
    }])
    with patch(
            "dvc.objects.stage._collect_dir",
            return_value=dir_info,
    ):
        hash1 = get_hash(path_info, dvc.odb.local.fs, "md5")

    dir_info = DirInfo.from_list([{
        "md5": "1",
        "relpath": "1"
    }, {
        "md5": "2",
        "relpath": "2"
    }])
    with patch(
            "dvc.objects.stage._collect_dir",
            return_value=dir_info,
    ):
        hash2 = get_hash(path_info, dvc.odb.local.fs, "md5")

    assert hash1 == hash2
Ejemplo n.º 2
0
def _changed(path_info, fs, obj, cache):
    logger.trace("checking if '%s'('%s') has changed.", path_info, obj)

    try:
        check(cache, obj)
    except (FileNotFoundError, ObjectFormatError):
        logger.debug("cache for '%s'('%s') has changed.", path_info,
                     obj.hash_info)
        return True

    try:
        actual = get_hash(path_info, fs, obj.hash_info.name)
    except FileNotFoundError:
        logger.debug("'%s' doesn't exist.", path_info)
        return True

    if obj.hash_info != actual:
        logger.debug(
            "hash value '%s' for '%s' has changed (actual '%s').",
            obj.hash_info,
            actual,
            path_info,
        )
        return True

    logger.trace("'%s' hasn't changed.", path_info)
    return False
Ejemplo n.º 3
0
def test_get_hash_cached_dir(tmp_dir, dvc, mocker):
    tmp_dir.dvc_gen(
        {"dir": {"foo": "foo", "bar": "bar", "subdir": {"data": "data"}}}
    )
    fs = RepoFileSystem(dvc)
    expected = "8761c4e9acad696bee718615e23e22db.dir"
    assert fs.info(PathInfo(tmp_dir) / "dir").get("md5") is None
    assert get_hash(PathInfo(tmp_dir) / "dir", fs, "md5") == HashInfo(
        "md5", "8761c4e9acad696bee718615e23e22db.dir",
    )

    shutil.rmtree(tmp_dir / "dir")
    assert fs.info(PathInfo(tmp_dir) / "dir")["md5"] == expected
    assert get_hash(PathInfo(tmp_dir) / "dir", fs, "md5") == HashInfo(
        "md5", "8761c4e9acad696bee718615e23e22db.dir",
    )
Ejemplo n.º 4
0
    def _get_hash(self, locked=True):
        from dvc.objects.stage import get_hash

        with self._make_repo(locked=locked) as repo:
            path_info = PathInfo(repo.root_dir) / self.def_path
            return get_hash(
                path_info, repo.repo_fs, "md5", follow_subrepos=False
            )
Ejemplo n.º 5
0
Archivo: base.py Proyecto: pyanezs/dvc
 def get_hash(self):
     if not self.use_cache:
         return get_hash(
             self.path_info,
             self.fs,
             self.fs.PARAM_CHECKSUM,
             self.repo.odb.local,
         )
     return ostage(self.odb, self.path_info, self.fs).hash_info
Ejemplo n.º 6
0
def test_get_hash_dirty_dir(tmp_dir, dvc):
    tmp_dir.dvc_gen({"dir": {"foo": "foo", "bar": "bar"}})
    (tmp_dir / "dir" / "baz").write_text("baz")

    fs = DvcFileSystem(dvc)
    expected = "5ea40360f5b4ec688df672a4db9c17d1.dir"
    assert fs.info(PathInfo(tmp_dir) / "dir").get("md5") == expected
    assert get_hash(PathInfo(tmp_dir) / "dir", fs,
                    "md5") == HashInfo("md5", expected)
Ejemplo n.º 7
0
def test_get_hash_dirty_file(tmp_dir, dvc):
    tmp_dir.dvc_gen("file", "file")
    (tmp_dir / "file").write_text("something")

    fs = DvcFileSystem(dvc)
    expected = "8c7dd922ad47494fc02c388e12c00eac"
    assert fs.info(PathInfo(tmp_dir) / "file").get("md5") == expected
    assert get_hash(PathInfo(tmp_dir) / "file", fs,
                    "md5") == HashInfo("md5", expected)
Ejemplo n.º 8
0
def test_get_hash_dirty_dir(tmp_dir, dvc):
    tmp_dir.dvc_gen({"dir": {"foo": "foo", "bar": "bar"}})
    (tmp_dir / "dir" / "baz").write_text("baz")

    fs = RepoFileSystem(dvc)
    actual = get_hash(PathInfo(tmp_dir) / "dir", fs, "md5")
    expected = HashInfo("md5", "ba75a2162ca9c29acecb7957105a0bc2.dir")
    assert actual == expected
    assert actual.dir_info.nfiles == 3
Ejemplo n.º 9
0
def test_get_hash_dirty_file(tmp_dir, dvc):
    tmp_dir.dvc_gen("file", "file")
    (tmp_dir / "file").write_text("something")

    fs = RepoFileSystem(dvc)
    assert fs.info(PathInfo(tmp_dir) / "file").get("md5") is None
    actual = get_hash(PathInfo(tmp_dir) / "file", fs, "md5")
    expected = HashInfo("md5", "437b930db84b8079c2dd804a71936b5f")
    assert actual == expected

    (tmp_dir / "file").unlink()
    assert (
        fs.info(PathInfo(tmp_dir) / "file")["md5"]
        == "8c7dd922ad47494fc02c388e12c00eac"
    )
    actual = get_hash(PathInfo(tmp_dir) / "file", fs, "md5")
    expected = HashInfo("md5", "8c7dd922ad47494fc02c388e12c00eac")
    assert actual == expected
Ejemplo n.º 10
0
def test_get_hash_cached_granular(tmp_dir, dvc, mocker):
    tmp_dir.dvc_gen(
        {"dir": {"foo": "foo", "bar": "bar", "subdir": {"data": "data"}}}
    )
    fs = RepoFileSystem(dvc)
    subdir = PathInfo(tmp_dir) / "dir" / "subdir"
    assert fs.info(subdir).get("md5") is None
    assert get_hash(subdir, fs, "md5") == HashInfo(
        "md5", "af314506f1622d107e0ed3f14ec1a3b5.dir",
    )
    assert fs.info(subdir / "data").get("md5") is None
    assert get_hash(subdir / "data", fs, "md5") == HashInfo(
        "md5", "8d777f385d3dfec8815d20f7496026dc",
    )
    (tmp_dir / "dir" / "subdir" / "data").unlink()
    assert (
        fs.info(subdir / "data")["md5"] == "8d777f385d3dfec8815d20f7496026dc"
    )
Ejemplo n.º 11
0
def test_get_hash_cached_file(tmp_dir, dvc, mocker):
    tmp_dir.dvc_gen({"foo": "foo"})
    fs = RepoFileSystem(dvc)
    expected = "acbd18db4cc2f85cedef654fccc4a4d8"
    assert fs.info(PathInfo(tmp_dir) / "foo").get("md5") is None
    assert get_hash(PathInfo(tmp_dir) / "foo", fs, "md5") == HashInfo(
        "md5", expected,
    )
    (tmp_dir / "foo").unlink()
    assert fs.info(PathInfo(tmp_dir) / "foo")["md5"] == expected
Ejemplo n.º 12
0
def test_get_hash_mixed_dir(tmp_dir, scm, dvc):
    tmp_dir.gen({"dir": {"foo": "foo", "bar": "bar"}})
    tmp_dir.dvc.add(os.path.join("dir", "foo"))
    tmp_dir.scm.add(
        [
            os.path.join("dir", "bar"),
            os.path.join("dir", ".gitignore"),
            os.path.join("dir", "foo.dvc"),
        ]
    )
    tmp_dir.scm.commit("add dir")

    fs = RepoFileSystem(dvc)
    actual = get_hash(PathInfo(tmp_dir) / "dir", fs, "md5")
    expected = HashInfo("md5", "e1d9e8eae5374860ae025ec84cfd85c7.dir")
    assert actual == expected
Ejemplo n.º 13
0
def _remove(path_info, fs, cache, force=False):
    if not fs.exists(path_info):
        return

    if force:
        fs.remove(path_info)
        return

    current = get_hash(path_info, fs, fs.PARAM_CHECKSUM)
    try:
        obj = load(cache, current)
        check(cache, obj)
    except (FileNotFoundError, ObjectFormatError):
        msg = (f"file/directory '{path_info}' is going to be removed. "
               "Are you sure you want to proceed?")

        if not prompt.confirm(msg):
            raise ConfirmRemoveError(str(path_info))

    fs.remove(path_info)
Ejemplo n.º 14
0
def get_url(path, repo=None, rev=None, remote=None):
    """
    Returns the URL to the storage location of a data file or directory tracked
    in a DVC repo. For Git repos, HEAD is used unless a rev argument is
    supplied. The default remote is tried unless a remote argument is supplied.

    Raises OutputNotFoundError if the file is not tracked by DVC.

    NOTE: This function does not check for the actual existence of the file or
    directory in the remote storage.
    """
    with Repo.open(repo, rev=rev, subrepos=True, uninitialized=True) as _repo:
        path_info = PathInfo(_repo.root_dir) / path
        with reraise(FileNotFoundError, PathMissingError(path, repo)):
            metadata = _repo.repo_fs.metadata(path_info)

        if not metadata.is_dvc:
            raise OutputNotFoundError(path, repo)

        cloud = metadata.repo.cloud
        hash_info = get_hash(path_info, _repo.repo_fs, "md5")
        return cloud.get_url_for(remote, checksum=hash_info.value)
Ejemplo n.º 15
0
 def _to_checksum(output):
     if on_working_fs:
         return get_hash(output.path_info, repo.odb.local.fs, "md5").value
     return output.hash_info.value