Esempio n. 1
0
    def save(self):
        if not self.exists:
            raise self.DoesNotExistError(self)

        if not self.isfile and not self.isdir:
            raise self.IsNotFileOrDirError(self)

        if self.is_empty:
            logger.warning(f"'{self}' is empty.")

        self.ignore()

        if self.metric or self.plot:
            self.verify_metric()

        if not self.use_cache:
            self.hash_info = self.get_hash()
            if not self.IS_DEPENDENCY:
                logger.debug("Output '%s' doesn't use cache. Skipping saving.",
                             self)
            return

        assert not self.IS_DEPENDENCY

        if not self.changed():
            logger.debug("Output '%s' didn't change. Skipping saving.", self)
            return

        self.obj = objects.stage(self.cache, self.path_info, self.fs)
        self.hash_info = self.obj.hash_info
        self.isexec = self.isfile() and self.fs.isexec(self.path_info)
Esempio n. 2
0
def test_fetch_external_repo_jobs(tmp_dir, scm, mocker, dvc, local_remote):
    tmp_dir.dvc_gen(
        {
            "dir1": {
                "file1": "file1",
                "file2": "file2",
                "file3": "file3",
                "file4": "file4",
            },
        },
        commit="init",
    )

    dvc.push()

    with external_repo(str(tmp_dir)) as repo:
        spy = mocker.spy(repo.cloud, "pull")

        obj = stage(
            dvc.cache.local,
            PathInfo(repo.root_dir) / "dir1",
            repo.repo_fs,
            follow_subrepos=False,
            jobs=3,
        )
        save(
            dvc.cache.local,
            obj,
            jobs=3,
        )

        run_jobs = tuple(spy.call_args_list[0])[1].get("jobs")
        assert run_jobs == 3
Esempio n. 3
0
def _fetch_external(self, repo_url, repo_rev, files, jobs):
    from dvc.external_repo import external_repo
    from dvc.objects import save, stage
    from dvc.path_info import PathInfo
    from dvc.scm.base import CloneError

    failed = 0

    results = []

    def cb(result):
        results.append(result)

    cache = self.cache.local
    try:
        with external_repo(repo_url, repo_rev,
                           cache_dir=cache.cache_dir) as repo:
            root = PathInfo(repo.root_dir)
            for path in files:
                path_info = root / path
                try:
                    used = repo.used_cache(
                        [os.fspath(path_info)],
                        force=True,
                        jobs=jobs,
                        recursive=True,
                    )
                    cb(repo.cloud.pull(used, jobs))
                except (NoOutputOrStageError, NoRemoteError):
                    pass
                obj = stage(
                    cache,
                    path_info,
                    repo.repo_fs,
                    jobs=jobs,
                    follow_subrepos=False,
                )
                save(
                    cache,
                    obj,
                    jobs=jobs,
                    download_callback=cb,
                )
    except CloneError:
        failed += 1
        logger.exception("failed to fetch data for '{}'".format(
            ", ".join(files)))

    return sum(results), failed
Esempio n. 4
0
    def commit(self, filter_info=None):
        if not self.exists:
            raise self.DoesNotExistError(self)

        assert self.hash_info

        if self.use_cache:
            obj = objects.stage(self.cache, filter_info or self.path_info,
                                self.fs)
            objects.save(self.cache, obj)
            checkout(
                filter_info or self.path_info,
                self.fs,
                obj,
                self.cache,
                relink=True,
            )
            self.set_exec()
Esempio n. 5
0
    def download(self, to, jobs=None):
        from dvc.checkout import checkout
        from dvc.objects import save, stage

        cache = self.repo.cache.local

        with self._make_repo(cache_dir=cache.cache_dir) as repo:
            if self.def_repo.get(self.PARAM_REV_LOCK) is None:
                self.def_repo[self.PARAM_REV_LOCK] = repo.get_rev()
            path_info = PathInfo(repo.root_dir) / self.def_path
            obj = stage(
                cache,
                path_info,
                repo.repo_fs,
                jobs=jobs,
                follow_subrepos=False,
            )
            save(cache, obj, jobs=jobs)

        checkout(to.path_info, to.fs, obj, cache)
Esempio n. 6
0
def test_subrepos_are_ignored(tmp_dir, erepo_dir):
    subrepo = erepo_dir / "dir" / "subrepo"
    make_subrepo(subrepo, erepo_dir.scm)
    with erepo_dir.chdir():
        erepo_dir.dvc_gen("dir/foo", "foo", commit="foo")
        erepo_dir.scm_gen("dir/bar", "bar", commit="bar")

    with subrepo.chdir():
        subrepo.dvc_gen({"file": "file"}, commit="add files on subrepo")

    with external_repo(os.fspath(erepo_dir)) as repo:
        repo.repo_fs.download(
            PathInfo(repo.root_dir) / "dir",
            PathInfo(tmp_dir / "out"),
            follow_subrepos=False,
        )
        expected_files = {"foo": "foo", "bar": "bar", ".gitignore": "/foo\n"}
        assert (tmp_dir / "out").read_text() == expected_files

        # clear cache to test saving to cache
        cache_dir = tmp_dir / repo.cache.local.cache_dir
        remove(cache_dir)
        makedirs(cache_dir)

        obj = stage(
            repo.cache.local,
            PathInfo(repo.root_dir) / "dir",
            repo.repo_fs,
            follow_subrepos=False,
        )
        save(repo.cache.local, obj)
        assert set(cache_dir.glob("*/*")) == {
            cache_dir / "e1" / "d9e8eae5374860ae025ec84cfd85c7.dir",
            cache_dir / "37" / "b51d194a7513e45b56f6524f2d51f2",
            cache_dir / "94" / "7d2b84e5aa88170e80dff467a5bfb6",
            cache_dir / "ac" / "bd18db4cc2f85cedef654fccc4a4d8",
        }
Esempio n. 7
0
 def get_hash(self):
     if not self.use_cache:
         return get_hash(self.path_info, self.fs, self.fs.PARAM_CHECKSUM)
     return objects.stage(self.cache, self.path_info, self.fs).hash_info