Example #1
0
def test_used_cache(tmp_dir, dvc, path):
    from dvc.objects.db import NamedCache

    tmp_dir.dvc_gen({"dir": {"subdir": {"file": "file"}, "other": "other"}})
    expected = NamedCache.make("local", "70922d6bf66eb073053a82f77d58c536.dir",
                               "dir")
    expected.add_child_cache(
        "70922d6bf66eb073053a82f77d58c536.dir",
        NamedCache.make(
            "local",
            "8c7dd922ad47494fc02c388e12c00eac",
            os.path.join("dir", "subdir", "file"),
        ),
    )

    used_cache = dvc.used_cache([path])
    assert (used_cache._items == expected._items
            and used_cache.external == expected.external)
Example #2
0
    def get_used_cache(self, **kwargs):
        """Get a dumpd of the given `out`, with an entry including the branch.

        The `used_cache` of an output is no more than its `info`.

        In case that the given output is a directory, it will also
        include the `info` of its files.
        """

        if not self.use_cache:
            return NamedCache()

        if self.stage.is_repo_import:
            cache = NamedCache()
            (dep,) = self.stage.deps
            cache.external[dep.repo_pair].add(dep.def_path)
            return cache

        if not self.hash_info:
            msg = (
                "Output '{}'({}) is missing version info. "
                "Cache for it will not be collected. "
                "Use `dvc repro` to get your pipeline up to date.".format(
                    self, self.stage
                )
            )
            if self.exists:
                msg += (
                    "\n"
                    "You can also use `dvc commit {stage.addressing}` "
                    "to associate existing '{out}' with {stage}.".format(
                        out=self, stage=self.stage
                    )
                )
            logger.warning(msg)
            return NamedCache()

        ret = NamedCache.make(self.scheme, self.hash_info.value, str(self))

        if not self.is_dir_checksum:
            return ret

        ret.add_child_cache(
            self.hash_info.value, self.collect_used_dir_cache(**kwargs)
        )

        return ret
Example #3
0
    def get_dir_cache(self, **kwargs):

        if not self.is_dir_checksum:
            raise DvcException("cannot get dir cache for file checksum")

        try:
            objects.check(self.odb, self.odb.get(self.hash_info))
        except (FileNotFoundError, ObjectFormatError):
            self.repo.cloud.pull(
                NamedCache.make("local", self.hash_info.value, str(self)),
                show_checksums=False,
                **kwargs,
            )

        try:
            self.obj = objects.load(self.odb, self.hash_info)
        except (FileNotFoundError, ObjectFormatError):
            self.obj = None

        return self.obj
Example #4
0
def test_cloud(tmp_dir, dvc, remote):  # pylint:disable=unused-argument
    (stage,) = tmp_dir.dvc_gen("foo", "foo")
    out = stage.outs[0]
    cache = out.cache_path
    md5 = out.hash_info.value
    info = out.get_used_cache()

    (stage_dir,) = tmp_dir.dvc_gen(
        {
            "data_dir": {
                "data_sub_dir": {"data_sub": "data_sub"},
                "data": "data",
                "empty": "",
            }
        }
    )
    out_dir = stage_dir.outs[0]
    cache_dir = out_dir.cache_path
    name_dir = str(out_dir)
    md5_dir = out_dir.hash_info.value
    info_dir = NamedCache.make(out_dir.scheme, md5_dir, name_dir)

    # Check status
    status = dvc.cloud.status(info, show_checksums=True)
    expected = {md5: {"name": md5, "status": STATUS_NEW}}
    assert status == expected

    status_dir = dvc.cloud.status(info_dir, show_checksums=True)
    expected = {md5_dir: {"name": md5_dir, "status": STATUS_NEW}}
    assert status_dir == expected

    # Move cache and check status
    # See issue https://github.com/iterative/dvc/issues/4383 for details
    backup_dir = dvc.odb.local.cache_dir + ".backup"
    move(dvc.odb.local.cache_dir, backup_dir)
    status = dvc.cloud.status(info, show_checksums=True)
    expected = {md5: {"name": md5, "status": STATUS_MISSING}}
    assert status == expected

    status_dir = dvc.cloud.status(info_dir, show_checksums=True)
    expected = {md5_dir: {"name": md5_dir, "status": STATUS_MISSING}}
    assert status_dir == expected

    # Restore original cache:
    remove(dvc.odb.local.cache_dir)
    move(backup_dir, dvc.odb.local.cache_dir)

    # Push and check status
    dvc.cloud.push(info)
    assert os.path.exists(cache)
    assert os.path.isfile(cache)

    dvc.cloud.push(info_dir)
    assert os.path.isfile(cache_dir)

    status = dvc.cloud.status(info, show_checksums=True)
    expected = {md5: {"name": md5, "status": STATUS_OK}}
    assert status == expected

    status_dir = dvc.cloud.status(info_dir, show_checksums=True)
    expected = {md5_dir: {"name": md5_dir, "status": STATUS_OK}}
    assert status_dir == expected

    # Remove and check status
    remove(dvc.odb.local.cache_dir)

    status = dvc.cloud.status(info, show_checksums=True)
    expected = {md5: {"name": md5, "status": STATUS_DELETED}}
    assert status == expected

    status_dir = dvc.cloud.status(info_dir, show_checksums=True)
    expected = {md5_dir: {"name": md5_dir, "status": STATUS_DELETED}}
    assert status_dir == expected

    # Pull and check status
    dvc.cloud.pull(info)
    assert os.path.exists(cache)
    assert os.path.isfile(cache)
    with open(cache) as fd:
        assert fd.read() == "foo"

    dvc.cloud.pull(info_dir)
    assert os.path.isfile(cache_dir)

    status = dvc.cloud.status(info, show_checksums=True)
    expected = {md5: {"name": md5, "status": STATUS_OK}}
    assert status == expected

    status_dir = dvc.cloud.status(info_dir, show_checksums=True)
    expected = {md5_dir: {"name": md5_dir, "status": STATUS_OK}}
    assert status_dir == expected