Esempio n. 1
0
    def make(name, *, scm=False, dvc=False, subdir=False):
        from shutil import ignore_patterns

        from dvc.repo import Repo
        from dvc.scm.git import Git
        from dvc.utils.fs import fs_copy

        cache = CACHE.get((scm, dvc, subdir))
        if not cache:
            cache = tmp_path_factory.mktemp("dvc-test-cache" + worker_id)
            TmpDir(cache).init(scm=scm, dvc=dvc, subdir=subdir)
            CACHE[(scm, dvc, subdir)] = os.fspath(cache)
        path = tmp_path_factory.mktemp(name) if isinstance(name, str) else name

        # ignore sqlite files from .dvc/tmp. We might not be closing the cache
        # connection resulting in PermissionErrors in Windows.
        ignore = ignore_patterns("cache.db*")
        for entry in os.listdir(cache):
            # shutil.copytree's dirs_exist_ok is only available in >=3.8
            fs_copy(
                os.path.join(cache, entry),
                os.path.join(path, entry),
                ignore=ignore,
            )
        new_dir = TmpDir(path)
        str_path = os.fspath(new_dir)
        if dvc:
            new_dir.dvc = Repo(str_path)
        if scm:
            new_dir.scm = (new_dir.dvc.scm
                           if hasattr(new_dir, "dvc") else Git(str_path))
        request.addfinalizer(new_dir.close)
        return new_dir
Esempio n. 2
0
    def pull_to(self, path, to_info):
        """
        Pull the corresponding file or directory specified by `path` and
        checkout it into `to_info`.

        It works with files tracked by Git and DVC, and also local files
        outside the repository.
        """
        out = None
        path_info = PathInfo(self.root_dir) / path

        with suppress(OutputNotFoundError):
            (out, ) = self.find_outs_by_path(fspath(path_info), strict=False)

        try:
            if out and out.use_cache:
                self._pull_cached(out, path_info, to_info)
                return

            # Check if it is handled by Git (it can't have an absolute path)
            if os.path.isabs(path):
                raise FileNotFoundError

            fs_copy(fspath(path_info), fspath(to_info))
        except FileNotFoundError:
            raise PathMissingError(path, self.url)
Esempio n. 3
0
    def pull_to(self, path, to_info):
        try:
            # Git handled files can't have absolute path
            if os.path.isabs(path):
                raise FileNotFoundError

            fs_copy(os.path.join(self.root_dir, path), fspath(to_info))
        except FileNotFoundError:
            raise PathMissingError(path, self.url)
Esempio n. 4
0
    def _copy_if_git_file(self, to_path):
        src_path = self.def_path
        repo_dir = cached_clone(**self.def_repo)

        if not self._is_git_file(repo_dir, src_path):
            return False

        src_full_path = os.path.join(repo_dir, src_path)
        dst_full_path = os.path.abspath(to_path)
        fs_copy(src_full_path, dst_full_path)
        return True
Esempio n. 5
0
    def _copy_if_git_file(self, to_path):
        src_path = self.def_path
        with self._make_repo(
                cache_dir=self.repo.cache.local.cache_dir) as repo:
            if not self._is_git_file(repo, src_path):
                return False

            src_full_path = os.path.join(repo.root_dir, src_path)
            dst_full_path = os.path.abspath(to_path)
            fs_copy(src_full_path, dst_full_path)
        return True
Esempio n. 6
0
    def _copy_if_git_file(self, to_path):
        src_path = self.def_path
        repo_dir = cached_clone(**self.def_repo)

        if not self._is_git_file(repo_dir, src_path):
            return False

        src_full_path = os.path.join(repo_dir, src_path)
        dst_full_path = os.path.abspath(to_path)
        fs_copy(src_full_path, dst_full_path)
        self.def_repo[self.PARAM_REV_LOCK] = SCM(repo_dir).get_rev()
        return True
Esempio n. 7
0
    def pull_to(self, path, to_info):
        try:
            out = None
            with suppress(OutputNotFoundError):
                out = self.find_out_by_relpath(path)

            if out and out.use_cache:
                self._pull_cached(out, to_info)
                return

            # Git handled files can't have absolute path
            if os.path.isabs(path):
                raise FileNotFoundError

            fs_copy(os.path.join(self.root_dir, path), fspath(to_info))
        except FileNotFoundError:
            raise PathMissingError(path, self.url)
Esempio n. 8
0
def get(url, path, out=None, rev=None):
    out = resolve_output(path, out)

    if Stage.is_valid_filename(out):
        raise GetDVCFileError()

    # Creating a directory right beside the output to make sure that they
    # are on the same filesystem, so we could take the advantage of
    # reflink and/or hardlink. Not using tempfile.TemporaryDirectory
    # because it will create a symlink to tmpfs, which defeats the purpose
    # and won't work with reflink/hardlink.
    dpath = os.path.dirname(os.path.abspath(out))
    tmp_dir = os.path.join(dpath, "." + str(shortuuid.uuid()))
    try:
        with external_repo(cache_dir=tmp_dir, url=url, rev=rev) as repo:
            # Try any links possible to avoid data duplication.
            #
            # Not using symlink, because we need to remove cache after we are
            # done, and to make that work we would have to copy data over
            # anyway before removing the cache, so we might just copy it
            # right away.
            #
            # Also, we can't use theoretical "move" link type here, because
            # the same cache file might be used a few times in a directory.
            repo.cache.local.cache_types = ["reflink", "hardlink", "copy"]

            try:
                output = repo.find_out_by_relpath(path)
            except OutputNotFoundError:
                output = None

            if output and output.use_cache:
                _get_cached(repo, output, out)
            else:
                # Either an uncached out with absolute path or a user error
                if os.path.isabs(path):
                    raise FileNotFoundError

                fs_copy(os.path.join(repo.root_dir, path), out)

    except (OutputNotFoundError, FileNotFoundError):
        raise PathMissingError(path, url)
    except NotDvcRepoError:
        raise UrlNotDvcRepoError(url)
    finally:
        remove(tmp_dir)
Esempio n. 9
0
File: get.py Progetto: woodshop/dvc
def get(url, path, out=None, rev=None):
    out = resolve_output(path, out)

    if Stage.is_valid_filename(out):
        raise GetDVCFileError()

    # Creating a directory right beside the output to make sure that they
    # are on the same filesystem, so we could take the advantage of
    # reflink and/or hardlink. Not using tempfile.TemporaryDirectory
    # because it will create a symlink to tmpfs, which defeats the purpose
    # and won't work with reflink/hardlink.
    dpath = os.path.dirname(os.path.abspath(out))
    tmp_dir = os.path.join(dpath, "." + str(shortuuid.uuid()))
    try:
        try:
            with external_repo(cache_dir=tmp_dir, url=url, rev=rev) as repo:
                # Try any links possible to avoid data duplication.
                #
                # Not using symlink, because we need to remove cache after we
                # are done, and to make that work we would have to copy data
                # over anyway before removing the cache, so we might just copy
                # it right away.
                #
                # Also, we can't use theoretical "move" link type here, because
                # the same cache file might be used a few times in a directory.
                repo.cache.local.cache_types = ["reflink", "hardlink", "copy"]
                output = repo.find_out_by_relpath(path)
                if output.use_cache:
                    _get_cached(repo, output, out)
                    return
                # Non-cached output, fall through and try to copy from git.
        except (NotDvcRepoError, NoOutputInExternalRepoError):
            # Not a DVC repository or, possibly, path is not tracked by DVC.
            # Fall through and try to copy from git.
            pass

        if os.path.isabs(path):
            raise FileNotFoundError

        repo_dir = cached_clone(url, rev=rev)

        fs_copy(os.path.join(repo_dir, path), out)
    except (OutputNotFoundError, FileNotFoundError):
        raise PathMissingError(path, url)
    finally:
        remove(tmp_dir)
Esempio n. 10
0
    def make(name, *, scm=False, dvc=False, subdir=False):
        from dvc.repo import Repo
        from dvc.scm.git import Git
        from dvc.utils.fs import fs_copy

        cache = CACHE.get((scm, dvc, subdir))
        if not cache:
            cache = tmp_path_factory.mktemp("dvc-test-cache" + worker_id)
            TmpDir(cache).init(scm=scm, dvc=dvc, subdir=subdir)
            CACHE[(scm, dvc, subdir)] = os.fspath(cache)
        path = tmp_path_factory.mktemp(name) if isinstance(name, str) else name
        for entry in os.listdir(cache):
            # shutil.copytree's dirs_exist_ok is only available in >=3.8
            fs_copy(os.path.join(cache, entry), os.path.join(path, entry))
        new_dir = TmpDir(path)
        str_path = os.fspath(new_dir)
        if dvc:
            new_dir.dvc = Repo(str_path)
        if scm:
            new_dir.scm = (new_dir.dvc.scm
                           if hasattr(new_dir, "dvc") else Git(str_path))
        request.addfinalizer(new_dir.close)
        return new_dir