Beispiel #1
0
def test_staging_file(tmp_dir, dvc):
    from dvc.objects import check
    from dvc.objects.stage import stage
    from dvc.objects.transfer import transfer

    tmp_dir.gen("foo", "foo")
    fs = LocalFileSystem()

    local_odb = dvc.odb.local
    staging_odb, obj = stage(local_odb, tmp_dir / "foo", fs, "md5")

    assert not local_odb.exists(obj.hash_info)
    assert staging_odb.exists(obj.hash_info)

    with pytest.raises(FileNotFoundError):
        check(local_odb, obj)
    check(staging_odb, obj)

    transfer(staging_odb, local_odb, {obj.hash_info}, move=True)
    check(local_odb, obj)
    with pytest.raises(FileNotFoundError):
        check(staging_odb, obj)

    path_info = local_odb.hash_to_path_info(obj.hash_info.value)
    assert fs.exists(path_info)
Beispiel #2
0
    def pull(
        self,
        objs: Iterable["HashInfo"],
        jobs: Optional[int] = None,
        remote: Optional[str] = None,
        odb: Optional["ObjectDB"] = None,
    ):
        """Pull data items in a cloud-agnostic way.

        Args:
            objs: objects to pull from the cloud.
            jobs: number of jobs that can be running simultaneously.
            remote: optional name of remote to pull from.
                By default remote from core.remote config option is used.
            odb: optional ODB to pull from. Overrides remote.
        """
        from dvc.objects.transfer import transfer

        if not odb:
            odb = self.get_remote_odb(remote, "pull")
        return transfer(
            odb,
            self.repo.odb.local,
            objs,
            jobs=jobs,
            src_index=get_index(odb),
            cache_odb=self.repo.odb.local,
            verify=odb.verify,
        )
Beispiel #3
0
def _process_stages(repo, sub_targets, stages, no_commit, pbar, to_remote,
                    to_cache, **kwargs):
    link_failures = []
    from dvc.dvcfile import Dvcfile

    from ..output.base import OutputDoesNotExistError

    if to_remote or to_cache:
        # Already verified in the add()
        (stage, ) = stages
        (target, ) = sub_targets
        (out, ) = stage.outs

        if to_remote:
            out.hash_info = repo.cloud.transfer(
                target,
                jobs=kwargs.get("jobs"),
                remote=kwargs.get("remote"),
                command="add",
            )
        else:
            from dvc.fs import get_cloud_fs
            from dvc.objects.transfer import transfer

            from_fs = get_cloud_fs(repo, url=target)
            out.hash_info = transfer(
                out.odb,
                from_fs,
                from_fs.path_info,
                jobs=kwargs.get("jobs"),
            )
            out.checkout()

        Dvcfile(repo, stage.path).dump(stage)
        return link_failures

    with Tqdm(
            total=len(stages),
            desc="Processing",
            unit="file",
            disable=len(stages) == 1,
    ) as pbar_stages:
        for stage in stages:
            try:
                stage.save()
            except OutputDoesNotExistError:
                pbar.n -= 1
                raise

            try:
                if not no_commit:
                    stage.commit()
            except CacheLinkError:
                link_failures.append(stage)

            Dvcfile(repo, stage.path).dump(stage)
            pbar_stages.update()

    return link_failures
Beispiel #4
0
def test_subrepos_are_ignored(tmp_dir, erepo_dir):
    subrepo = erepo_dir / "dir" / "subrepo"
    make_subrepo(subrepo, erepo_dir.scm)
    with erepo_dir.chdir():
        erepo_dir.dvc_gen("dir/foo", "foo", commit="foo")
        erepo_dir.scm_gen("dir/bar", "bar", commit="bar")

    with subrepo.chdir():
        subrepo.dvc_gen({"file": "file"}, commit="add files on subrepo")

    with external_repo(os.fspath(erepo_dir)) as repo:
        repo.repo_fs.download(
            PathInfo(repo.root_dir) / "dir",
            PathInfo(tmp_dir / "out"),
            follow_subrepos=False,
        )
        expected_files = {"foo": "foo", "bar": "bar", ".gitignore": "/foo\n"}
        assert (tmp_dir / "out").read_text() == expected_files

        # clear cache to test saving to cache
        cache_dir = tmp_dir / repo.odb.local.cache_dir
        remove(cache_dir)
        clean_staging()
        makedirs(cache_dir)

        staging, _, obj = stage(
            repo.odb.local,
            PathInfo(repo.root_dir) / "dir",
            repo.repo_fs,
            "md5",
            dvcignore=repo.dvcignore,
        )
        transfer(
            staging,
            repo.odb.local,
            {obj.hash_info},
            shallow=False,
            move=True,
        )
        assert set(cache_dir.glob("??/*")) == {
            cache_dir / "e1" / "d9e8eae5374860ae025ec84cfd85c7.dir",
            cache_dir / "37" / "b51d194a7513e45b56f6524f2d51f2",
            cache_dir / "94" / "7d2b84e5aa88170e80dff467a5bfb6",
            cache_dir / "ac" / "bd18db4cc2f85cedef654fccc4a4d8",
        }
Beispiel #5
0
    def transfer(self, from_fs, from_info, jobs=None, no_progress_bar=False):
        from dvc.objects.transfer import transfer

        return transfer(
            self.odb,
            from_fs,
            from_info,
            jobs=jobs,
            no_progress_bar=no_progress_bar,
        )