Beispiel #1
0
    def status(
        self,
        objs: Iterable["HashInfo"],
        jobs: Optional[int] = None,
        remote: Optional[str] = None,
        odb: Optional["ObjectDB"] = None,
        log_missing: bool = True,
    ):
        """Check status of data items in a cloud-agnostic way.

        Args:
            objs: objects to check status for.
            jobs: number of jobs that can be running simultaneously.
            remote: optional remote to compare
                cache to. By default remote from core.remote config option
                is used.
            odb: optional ODB to check status from. Overrides remote.
            log_missing: log warning messages if file doesn't exist
                neither in cache, neither in cloud.
        """
        from dvc.data.status import compare_status

        if not odb:
            odb = self.get_remote_odb(remote, "status")
        return compare_status(
            self.repo.odb.local,
            odb,
            objs,
            jobs=jobs,
            log_missing=log_missing,
            dest_index=get_index(odb),
            cache_odb=self.repo.odb.local,
        )
Beispiel #2
0
    def pull(
        self,
        objs: Iterable["HashInfo"],
        jobs: Optional[int] = None,
        remote: Optional[str] = None,
        odb: Optional["ObjectDB"] = None,
    ):
        """Pull data items in a cloud-agnostic way.

        Args:
            objs: objects to pull from the cloud.
            jobs: number of jobs that can be running simultaneously.
            remote: optional name of remote to pull from.
                By default remote from core.remote config option is used.
            odb: optional ODB to pull from. Overrides remote.
        """
        from dvc.data.transfer import transfer

        if not odb:
            odb = self.get_remote_odb(remote, "pull")
        return transfer(
            odb,
            self.repo.odb.local,
            objs,
            jobs=jobs,
            src_index=get_index(odb),
            cache_odb=self.repo.odb.local,
            verify=odb.verify,
        )
Beispiel #3
0
def test_remote_index_dir_config(make_tmp_dir, dvc):
    index_dir = str(make_tmp_dir("tmp_index"))
    with dvc.config.edit() as conf:
        conf["index"]["dir"] = index_dir
        conf["remote"]["s3"] = {"url": "s3://bucket/name"}

    dvc.root_dir = "/usr/local/test_repo"

    assert os.path.dirname(
        get_index(
            dvc.cloud.get_remote_odb(name="s3")).index_dir) == os.path.join(
                index_dir, ".dvc", "test_repo-a473718", "index")
Beispiel #4
0
def gc(
    self,
    all_branches=False,
    cloud=False,
    remote=None,
    with_deps=False,
    all_tags=False,
    all_commits=False,
    all_experiments=False,
    force=False,
    jobs=None,
    repos=None,
    workspace=False,
):

    # require `workspace` to be true to come into effect.
    # assume `workspace` to be enabled if any of `all_tags`, `all_commits`,
    # `all_experiments` or `all_branches` are enabled.
    _raise_error_if_all_disabled(
        workspace=workspace,
        all_tags=all_tags,
        all_commits=all_commits,
        all_branches=all_branches,
        all_experiments=all_experiments,
    )

    from contextlib import ExitStack

    from dvc.data.db import get_index
    from dvc.data.gc import gc as ogc
    from dvc.repo import Repo

    if not repos:
        repos = []
    all_repos = [Repo(path) for path in repos]

    used_obj_ids = set()
    with ExitStack() as stack:
        for repo in all_repos:
            stack.enter_context(repo.lock)

        for repo in all_repos + [self]:
            for obj_ids in repo.used_objs(
                    all_branches=all_branches,
                    with_deps=with_deps,
                    all_tags=all_tags,
                    all_commits=all_commits,
                    all_experiments=all_experiments,
                    remote=remote,
                    force=force,
                    jobs=jobs,
            ).values():
                used_obj_ids.update(obj_ids)

    for scheme, odb in self.odb.by_scheme():
        if not odb:
            continue

        removed = ogc(odb, used_obj_ids, jobs=jobs)
        if not removed:
            logger.info(f"No unused '{scheme}' cache to remove.")

    if not cloud:
        return

    odb = self.cloud.get_remote_odb(remote, "gc -c")
    removed = ogc(odb, used_obj_ids, jobs=jobs)
    if removed:
        get_index(odb).clear()
    else:
        logger.info("No unused cache to remove from remote.")
Beispiel #5
0
def index(dvc, local_remote, mocker):
    odb = dvc.cloud.get_remote_odb("upstream")
    return get_index(odb)