def test_staging_file(tmp_dir, dvc): from dvc.objects import check from dvc.objects.stage import stage from dvc.objects.transfer import transfer tmp_dir.gen("foo", "foo") fs = LocalFileSystem() local_odb = dvc.odb.local staging_odb, obj = stage(local_odb, tmp_dir / "foo", fs, "md5") assert not local_odb.exists(obj.hash_info) assert staging_odb.exists(obj.hash_info) with pytest.raises(FileNotFoundError): check(local_odb, obj) check(staging_odb, obj) transfer(staging_odb, local_odb, {obj.hash_info}, move=True) check(local_odb, obj) with pytest.raises(FileNotFoundError): check(staging_odb, obj) path_info = local_odb.hash_to_path_info(obj.hash_info.value) assert fs.exists(path_info)
def pull( self, objs: Iterable["HashInfo"], jobs: Optional[int] = None, remote: Optional[str] = None, odb: Optional["ObjectDB"] = None, ): """Pull data items in a cloud-agnostic way. Args: objs: objects to pull from the cloud. jobs: number of jobs that can be running simultaneously. remote: optional name of remote to pull from. By default remote from core.remote config option is used. odb: optional ODB to pull from. Overrides remote. """ from dvc.objects.transfer import transfer if not odb: odb = self.get_remote_odb(remote, "pull") return transfer( odb, self.repo.odb.local, objs, jobs=jobs, src_index=get_index(odb), cache_odb=self.repo.odb.local, verify=odb.verify, )
def _process_stages(repo, sub_targets, stages, no_commit, pbar, to_remote, to_cache, **kwargs): link_failures = [] from dvc.dvcfile import Dvcfile from ..output.base import OutputDoesNotExistError if to_remote or to_cache: # Already verified in the add() (stage, ) = stages (target, ) = sub_targets (out, ) = stage.outs if to_remote: out.hash_info = repo.cloud.transfer( target, jobs=kwargs.get("jobs"), remote=kwargs.get("remote"), command="add", ) else: from dvc.fs import get_cloud_fs from dvc.objects.transfer import transfer from_fs = get_cloud_fs(repo, url=target) out.hash_info = transfer( out.odb, from_fs, from_fs.path_info, jobs=kwargs.get("jobs"), ) out.checkout() Dvcfile(repo, stage.path).dump(stage) return link_failures with Tqdm( total=len(stages), desc="Processing", unit="file", disable=len(stages) == 1, ) as pbar_stages: for stage in stages: try: stage.save() except OutputDoesNotExistError: pbar.n -= 1 raise try: if not no_commit: stage.commit() except CacheLinkError: link_failures.append(stage) Dvcfile(repo, stage.path).dump(stage) pbar_stages.update() return link_failures
def test_subrepos_are_ignored(tmp_dir, erepo_dir): subrepo = erepo_dir / "dir" / "subrepo" make_subrepo(subrepo, erepo_dir.scm) with erepo_dir.chdir(): erepo_dir.dvc_gen("dir/foo", "foo", commit="foo") erepo_dir.scm_gen("dir/bar", "bar", commit="bar") with subrepo.chdir(): subrepo.dvc_gen({"file": "file"}, commit="add files on subrepo") with external_repo(os.fspath(erepo_dir)) as repo: repo.repo_fs.download( PathInfo(repo.root_dir) / "dir", PathInfo(tmp_dir / "out"), follow_subrepos=False, ) expected_files = {"foo": "foo", "bar": "bar", ".gitignore": "/foo\n"} assert (tmp_dir / "out").read_text() == expected_files # clear cache to test saving to cache cache_dir = tmp_dir / repo.odb.local.cache_dir remove(cache_dir) clean_staging() makedirs(cache_dir) staging, _, obj = stage( repo.odb.local, PathInfo(repo.root_dir) / "dir", repo.repo_fs, "md5", dvcignore=repo.dvcignore, ) transfer( staging, repo.odb.local, {obj.hash_info}, shallow=False, move=True, ) assert set(cache_dir.glob("??/*")) == { cache_dir / "e1" / "d9e8eae5374860ae025ec84cfd85c7.dir", cache_dir / "37" / "b51d194a7513e45b56f6524f2d51f2", cache_dir / "94" / "7d2b84e5aa88170e80dff467a5bfb6", cache_dir / "ac" / "bd18db4cc2f85cedef654fccc4a4d8", }
def transfer(self, from_fs, from_info, jobs=None, no_progress_bar=False): from dvc.objects.transfer import transfer return transfer( self.odb, from_fs, from_info, jobs=jobs, no_progress_bar=no_progress_bar, )