Exemple #1
0
    def get_used_cache(self, *args, **kwargs):
        from dvc.objects.db import NamedCache

        cache = NamedCache()
        for out in self.filter_outs(kwargs.get("filter_info")):
            cache.update(out.get_used_cache(*args, **kwargs))

        return cache
Exemple #2
0
    def get_used_cache(self, used_run_cache, *args, **kwargs):
        from dvc.objects.db import NamedCache

        cache = NamedCache()

        for key, value in used_run_cache:
            entry = self._load_cache(key, value)
            if not entry:
                continue
            stage = self._create_stage(entry)
            cache.update(stage.get_used_cache(*args, **kwargs))
        return cache
Exemple #3
0
    def used_cache(
        self,
        targets=None,
        all_branches=False,
        with_deps=False,
        all_tags=False,
        all_commits=False,
        all_experiments=False,
        remote=None,
        force=False,
        jobs=None,
        recursive=False,
        used_run_cache=None,
        revs=None,
    ):
        """Get the stages related to the given target and collect
        the `info` of its outputs.

        This is useful to know what files from the cache are _in use_
        (namely, a file described as an output on a stage).

        The scope is, by default, the working directory, but you can use
        `all_branches`/`all_tags`/`all_commits`/`all_experiments` to expand
        the scope.

        Returns:
            A dictionary with Schemes (representing output's location) mapped
            to items containing the output's `dumpd` names and the output's
            children (if the given output is a directory).
        """
        from dvc.objects.db import NamedCache

        cache = NamedCache()

        for branch in self.brancher(
                revs=revs,
                all_branches=all_branches,
                all_tags=all_tags,
                all_commits=all_commits,
                all_experiments=all_experiments,
        ):
            targets = targets or [None]

            pairs = cat(
                self.stage.collect_granular(
                    target, recursive=recursive, with_deps=with_deps)
                for target in targets)

            suffix = f"({branch})" if branch else ""
            for stage, filter_info in pairs:
                used_cache = stage.get_used_cache(
                    remote=remote,
                    force=force,
                    jobs=jobs,
                    filter_info=filter_info,
                )
                cache.update(used_cache, suffix=suffix)

        if used_run_cache:
            used_cache = self.stage_cache.get_used_cache(
                used_run_cache,
                remote=remote,
                force=force,
                jobs=jobs,
            )
            cache.update(used_cache)

        return cache
Exemple #4
0
def gc(
    self,
    all_branches=False,
    cloud=False,
    remote=None,
    with_deps=False,
    all_tags=False,
    all_commits=False,
    all_experiments=False,
    force=False,
    jobs=None,
    repos=None,
    workspace=False,
):

    # require `workspace` to be true to come into effect.
    # assume `workspace` to be enabled if any of `all_tags`, `all_commits`,
    # `all_experiments` or `all_branches` are enabled.
    _raise_error_if_all_disabled(
        workspace=workspace,
        all_tags=all_tags,
        all_commits=all_commits,
        all_branches=all_branches,
        all_experiments=all_experiments,
    )

    from contextlib import ExitStack

    from dvc.repo import Repo

    if not repos:
        repos = []
    all_repos = [Repo(path) for path in repos]

    with ExitStack() as stack:
        for repo in all_repos:
            stack.enter_context(repo.lock)
            stack.enter_context(repo.state)

        used = NamedCache()
        for repo in all_repos + [self]:
            used.update(
                repo.used_cache(
                    all_branches=all_branches,
                    with_deps=with_deps,
                    all_tags=all_tags,
                    all_commits=all_commits,
                    all_experiments=all_experiments,
                    remote=remote,
                    force=force,
                    jobs=jobs,
                ))

    for scheme, odb in self.odb.by_scheme():
        if not odb:
            continue

        removed = odb.gc(set(used.scheme_keys(scheme)), jobs=jobs)
        if not removed:
            logger.info(f"No unused '{scheme}' cache to remove.")

    if not cloud:
        return

    remote = self.cloud.get_remote(remote, "gc -c")
    removed = remote.gc(set(used.scheme_keys(Schemes.LOCAL)), jobs=jobs)
    if not removed:
        logger.info("No unused cache to remove from remote.")