def status( self, objs: Iterable["HashInfo"], jobs: Optional[int] = None, remote: Optional[str] = None, odb: Optional["ObjectDB"] = None, log_missing: bool = True, ): """Check status of data items in a cloud-agnostic way. Args: objs: objects to check status for. jobs: number of jobs that can be running simultaneously. remote: optional remote to compare cache to. By default remote from core.remote config option is used. odb: optional ODB to check status from. Overrides remote. log_missing: log warning messages if file doesn't exist neither in cache, neither in cloud. """ from dvc.data.status import compare_status if not odb: odb = self.get_remote_odb(remote, "status") return compare_status( self.repo.odb.local, odb, objs, jobs=jobs, log_missing=log_missing, dest_index=get_index(odb), cache_odb=self.repo.odb.local, )
def pull( self, objs: Iterable["HashInfo"], jobs: Optional[int] = None, remote: Optional[str] = None, odb: Optional["ObjectDB"] = None, ): """Pull data items in a cloud-agnostic way. Args: objs: objects to pull from the cloud. jobs: number of jobs that can be running simultaneously. remote: optional name of remote to pull from. By default remote from core.remote config option is used. odb: optional ODB to pull from. Overrides remote. """ from dvc.data.transfer import transfer if not odb: odb = self.get_remote_odb(remote, "pull") return transfer( odb, self.repo.odb.local, objs, jobs=jobs, src_index=get_index(odb), cache_odb=self.repo.odb.local, verify=odb.verify, )
def test_remote_index_dir_config(make_tmp_dir, dvc): index_dir = str(make_tmp_dir("tmp_index")) with dvc.config.edit() as conf: conf["index"]["dir"] = index_dir conf["remote"]["s3"] = {"url": "s3://bucket/name"} dvc.root_dir = "/usr/local/test_repo" assert os.path.dirname( get_index( dvc.cloud.get_remote_odb(name="s3")).index_dir) == os.path.join( index_dir, ".dvc", "test_repo-a473718", "index")
def gc( self, all_branches=False, cloud=False, remote=None, with_deps=False, all_tags=False, all_commits=False, all_experiments=False, force=False, jobs=None, repos=None, workspace=False, ): # require `workspace` to be true to come into effect. # assume `workspace` to be enabled if any of `all_tags`, `all_commits`, # `all_experiments` or `all_branches` are enabled. _raise_error_if_all_disabled( workspace=workspace, all_tags=all_tags, all_commits=all_commits, all_branches=all_branches, all_experiments=all_experiments, ) from contextlib import ExitStack from dvc.data.db import get_index from dvc.data.gc import gc as ogc from dvc.repo import Repo if not repos: repos = [] all_repos = [Repo(path) for path in repos] used_obj_ids = set() with ExitStack() as stack: for repo in all_repos: stack.enter_context(repo.lock) for repo in all_repos + [self]: for obj_ids in repo.used_objs( all_branches=all_branches, with_deps=with_deps, all_tags=all_tags, all_commits=all_commits, all_experiments=all_experiments, remote=remote, force=force, jobs=jobs, ).values(): used_obj_ids.update(obj_ids) for scheme, odb in self.odb.by_scheme(): if not odb: continue removed = ogc(odb, used_obj_ids, jobs=jobs) if not removed: logger.info(f"No unused '{scheme}' cache to remove.") if not cloud: return odb = self.cloud.get_remote_odb(remote, "gc -c") removed = ogc(odb, used_obj_ids, jobs=jobs) if removed: get_index(odb).clear() else: logger.info("No unused cache to remove from remote.")
def index(dvc, local_remote, mocker): odb = dvc.cloud.get_remote_odb("upstream") return get_index(odb)