예제 #1
0
def _changed(path_info, fs, obj, cache):
    logger.trace("checking if '%s'('%s') has changed.", path_info, obj)

    try:
        check(cache, obj)
    except (FileNotFoundError, ObjectFormatError):
        logger.debug("cache for '%s'('%s') has changed.", path_info,
                     obj.hash_info)
        return True

    try:
        actual = stage(cache, path_info, fs, obj.hash_info.name).hash_info
    except FileNotFoundError:
        logger.debug("'%s' doesn't exist.", path_info)
        return True

    if obj.hash_info != actual:
        logger.debug(
            "hash value '%s' for '%s' has changed (actual '%s').",
            obj.hash_info,
            actual,
            path_info,
        )
        return True

    logger.trace("'%s' hasn't changed.", path_info)
    return False
예제 #2
0
    def _collect_used_dir_cache(self,
                                remote=None,
                                force=False,
                                jobs=None,
                                filter_info=None) -> Optional["Tree"]:
        """Fetch dir cache and return used object IDs for this out."""

        try:
            self.get_dir_cache(jobs=jobs, remote=remote)
        except DvcException:
            logger.debug(f"failed to pull cache for '{self}'")

        try:
            objects.check(self.odb, self.odb.get(self.hash_info))
        except FileNotFoundError:
            msg = ("Missing cache for directory '{}'. "
                   "Cache for files inside will be lost. "
                   "Would you like to continue? Use '-f' to force.")
            if not force and not prompt.confirm(msg.format(self.path_info)):
                raise CollectCacheError(
                    "unable to fully collect used cache"
                    " without cache for directory '{}'".format(self))
            return None

        obj = self.get_obj()
        if filter_info and filter_info != self.path_info:
            prefix = filter_info.relative_to(self.path_info).parts
            obj = obj.filter(prefix)
        return obj
예제 #3
0
    def collect_used_dir_cache(
            self,
            remote=None,
            force=False,
            jobs=None,
            filter_info=None) -> Dict[Optional["ObjectDB"], Set["HashFile"]]:
        """Fetch dir cache and return used objects for this out."""

        try:
            self.get_dir_cache(jobs=jobs, remote=remote)
        except DvcException:
            logger.debug(f"failed to pull cache for '{self}'")

        try:
            objects.check(self.odb, self.odb.get(self.hash_info))
        except (FileNotFoundError, ObjectFormatError):
            msg = ("Missing cache for directory '{}'. "
                   "Cache for files inside will be lost. "
                   "Would you like to continue? Use '-f' to force.")
            if not force and not prompt.confirm(msg.format(self.path_info)):
                raise CollectCacheError(
                    "unable to fully collect used cache"
                    " without cache for directory '{}'".format(self))
            return {}

        obj = self.get_obj(filter_info=filter_info, copy=True)
        self._set_obj_names(obj)
        return {None: {obj}}
예제 #4
0
    def collect_used_dir_cache(
        self, remote=None, force=False, jobs=None, filter_info=None
    ):
        """Get a list of `info`s related to the given directory.

        - Pull the directory entry from the remote cache if it was changed.

        Example:

            Given the following commands:

            $ echo "foo" > directory/foo
            $ echo "bar" > directory/bar
            $ dvc add directory

            It will return a NamedCache like:

            nc = NamedCache()
            nc.add(self.scheme, 'c157a79031e1', 'directory/foo')
            nc.add(self.scheme, 'd3b07384d113', 'directory/bar')
        """

        cache = NamedCache()

        try:
            self.get_dir_cache(jobs=jobs, remote=remote)
        except DvcException:
            logger.debug(f"failed to pull cache for '{self}'")

        try:
            objects.check(self.odb, self.odb.get(self.hash_info))
        except (FileNotFoundError, ObjectFormatError):
            msg = (
                "Missing cache for directory '{}'. "
                "Cache for files inside will be lost. "
                "Would you like to continue? Use '-f' to force."
            )
            if not force and not prompt.confirm(msg.format(self.path_info)):
                raise CollectCacheError(
                    "unable to fully collect used cache"
                    " without cache for directory '{}'".format(self)
                )
            return cache

        path = str(self.path_info)
        filter_path = str(filter_info) if filter_info else None
        for entry_key, entry_obj in self.obj:
            entry_path = os.path.join(path, *entry_key)
            if (
                not filter_path
                or entry_path == filter_path
                or entry_path.startswith(filter_path + os.sep)
            ):
                cache.add(self.scheme, entry_obj.hash_info.value, entry_path)

        return cache
예제 #5
0
파일: checkout.py 프로젝트: pyanezs/dvc
def checkout(
    path_info,
    fs,
    obj,
    cache,
    force=False,
    progress_callback=None,
    relink=False,
    quiet=False,
):
    if path_info.scheme not in ["local", cache.fs.scheme]:
        raise NotImplementedError

    failed = None
    skip = False
    if not obj:
        if not quiet:
            logger.warning(
                "No file hash info found for '%s'. It won't be created.",
                path_info,
            )
        _remove(path_info, fs, cache, force=force)
        failed = path_info

    elif not relink and not _changed(path_info, fs, obj, cache):
        logger.trace("Data '%s' didn't change.", path_info)
        skip = True
    else:
        try:
            check(cache, obj)
        except (FileNotFoundError, ObjectFormatError):
            if not quiet:
                logger.warning(
                    "Cache '%s' not found. File '%s' won't be created.",
                    obj.hash_info,
                    path_info,
                )
            _remove(path_info, fs, cache, force=force)
            failed = path_info

    if failed or skip:
        if progress_callback and obj:
            progress_callback(
                str(path_info), len(obj),
            )
        if failed:
            raise CheckoutError([failed])
        return

    logger.debug("Checking out '%s' with cache '%s'.", path_info, obj)

    return _checkout(
        path_info, fs, obj, cache, force, progress_callback, relink,
    )
예제 #6
0
    def changed_cache(self, filter_info=None):
        if not self.use_cache or not self.hash_info:
            return True

        obj = self.get_obj(filter_info=filter_info)
        if not obj:
            return True

        try:
            objects.check(self.odb, obj)
            return False
        except (FileNotFoundError, ObjectFormatError):
            return True
예제 #7
0
def test_staging_file(tmp_dir, dvc):
    from dvc.objects import check
    from dvc.objects.stage import stage
    from dvc.objects.transfer import transfer

    tmp_dir.gen("foo", "foo")
    fs = LocalFileSystem()

    local_odb = dvc.odb.local
    staging_odb, obj = stage(local_odb, tmp_dir / "foo", fs, "md5")

    assert not local_odb.exists(obj.hash_info)
    assert staging_odb.exists(obj.hash_info)

    with pytest.raises(FileNotFoundError):
        check(local_odb, obj)
    check(staging_odb, obj)

    transfer(staging_odb, local_odb, {obj.hash_info}, move=True)
    check(local_odb, obj)
    with pytest.raises(FileNotFoundError):
        check(staging_odb, obj)

    path_info = local_odb.hash_to_path_info(obj.hash_info.value)
    assert fs.exists(path_info)
예제 #8
0
    def get_dir_cache(self, **kwargs):
        if not self.is_dir_checksum:
            raise DvcException("cannot get dir cache for file checksum")

        obj = self.odb.get(self.hash_info)
        try:
            objects.check(self.odb, obj)
        except FileNotFoundError:
            self.repo.cloud.pull([obj], **kwargs)

        try:
            self.obj = objects.load(self.odb, self.hash_info)
        except (FileNotFoundError, ObjectFormatError):
            self.obj = None

        return self.obj
예제 #9
0
def _remove(path_info, fs, cache, force=False):
    if not fs.exists(path_info):
        return

    if force:
        fs.remove(path_info)
        return

    current = stage(cache, path_info, fs, fs.PARAM_CHECKSUM).hash_info
    try:
        obj = load(cache, current)
        check(cache, obj)
    except (FileNotFoundError, ObjectFormatError):
        msg = (f"file/directory '{path_info}' is going to be removed. "
               "Are you sure you want to proceed?")

        if not prompt.confirm(msg):
            raise ConfirmRemoveError(str(path_info))

    fs.remove(path_info)
예제 #10
0
    def get_dir_cache(self, **kwargs):

        if not self.is_dir_checksum:
            raise DvcException("cannot get dir cache for file checksum")

        try:
            objects.check(self.odb, self.odb.get(self.hash_info))
        except (FileNotFoundError, ObjectFormatError):
            self.repo.cloud.pull(
                NamedCache.make("local", self.hash_info.value, str(self)),
                show_checksums=False,
                **kwargs,
            )

        try:
            self.obj = objects.load(self.odb, self.hash_info)
        except (FileNotFoundError, ObjectFormatError):
            self.obj = None

        return self.obj
예제 #11
0
def test_get_hash_dirty_file(tmp_dir, dvc):
    from dvc.objects import check
    from dvc.objects.errors import ObjectFormatError
    from dvc.objects.stage import get_file_hash

    tmp_dir.dvc_gen("file", "file")
    file_hash_info = HashInfo("md5", "8c7dd922ad47494fc02c388e12c00eac")

    (tmp_dir / "file").write_text("something")
    something_hash_info = HashInfo("md5", "437b930db84b8079c2dd804a71936b5f")

    clean_staging()

    # file is modified in workspace
    # get_file_hash(file) should return workspace hash, not DVC cached hash
    fs = RepoFileSystem(repo=dvc)
    assert fs.info(PathInfo(tmp_dir) / "file").get("md5") is None
    staging, _, obj = stage(dvc.odb.local,
                            PathInfo(tmp_dir) / "file", fs, "md5")
    assert obj.hash_info == something_hash_info
    check(staging, obj)

    # file is removed in workspace
    # any staged object referring to modified workspace obj is now invalid
    (tmp_dir / "file").unlink()
    with pytest.raises(ObjectFormatError):
        check(staging, obj)

    # get_file_hash(file) should return DVC cached hash
    assert fs.info(PathInfo(tmp_dir) / "file")["md5"] == file_hash_info.value
    _, hash_info = get_file_hash(PathInfo(tmp_dir) / "file",
                                 fs,
                                 "md5",
                                 state=dvc.state)
    assert hash_info == file_hash_info

    # tmp_dir/file can be staged even though it is missing in workspace since
    # repofs will use the DVC cached hash (and refer to the local cache object)
    _, _, obj = stage(dvc.odb.local, PathInfo(tmp_dir) / "file", fs, "md5")
    assert obj.hash_info == file_hash_info