Exemple #1
0
    def _copy_if_git_file(self, to_path):
        src_path = self.def_path
        repo_dir = cached_clone(**self.def_repo)

        if not self._is_git_file(repo_dir, src_path):
            return False

        src_full_path = os.path.join(repo_dir, src_path)
        dst_full_path = os.path.abspath(to_path)
        fs_copy(src_full_path, dst_full_path)
        return True
Exemple #2
0
    def _copy_if_git_file(self, to_path):
        src_path = self.def_path
        repo_dir = cached_clone(**self.def_repo)

        if not self._is_git_file(repo_dir, src_path):
            return False

        src_full_path = os.path.join(repo_dir, src_path)
        dst_full_path = os.path.abspath(to_path)
        fs_copy(src_full_path, dst_full_path)
        self.def_repo[self.PARAM_REV_LOCK] = SCM(repo_dir).get_rev()
        return True
Exemple #3
0
def get(url, path, out=None, rev=None):
    out = resolve_output(path, out)

    if Stage.is_valid_filename(out):
        raise GetDVCFileError()

    # Creating a directory right beside the output to make sure that they
    # are on the same filesystem, so we could take the advantage of
    # reflink and/or hardlink. Not using tempfile.TemporaryDirectory
    # because it will create a symlink to tmpfs, which defeats the purpose
    # and won't work with reflink/hardlink.
    dpath = os.path.dirname(os.path.abspath(out))
    tmp_dir = os.path.join(dpath, "." + str(shortuuid.uuid()))
    try:
        try:
            with external_repo(cache_dir=tmp_dir, url=url, rev=rev) as repo:
                # Try any links possible to avoid data duplication.
                #
                # Not using symlink, because we need to remove cache after we
                # are done, and to make that work we would have to copy data
                # over anyway before removing the cache, so we might just copy
                # it right away.
                #
                # Also, we can't use theoretical "move" link type here, because
                # the same cache file might be used a few times in a directory.
                repo.cache.local.cache_types = ["reflink", "hardlink", "copy"]
                output = repo.find_out_by_relpath(path)
                if output.use_cache:
                    _get_cached(repo, output, out)
                    return
                # Non-cached output, fall through and try to copy from git.
        except (NotDvcRepoError, NoOutputInExternalRepoError):
            # Not a DVC repository or, possibly, path is not tracked by DVC.
            # Fall through and try to copy from git.
            pass

        if os.path.isabs(path):
            raise FileNotFoundError

        repo_dir = cached_clone(url, rev=rev)

        fs_copy(os.path.join(repo_dir, path), out)
    except (OutputNotFoundError, FileNotFoundError):
        raise PathMissingError(path, url)
    finally:
        remove(tmp_dir)
Exemple #4
0
    def _get_checksum(self, updated=False):
        rev_lock = None
        if not updated:
            rev_lock = self.def_repo.get(self.PARAM_REV_LOCK)

        try:
            with self._make_repo(rev_lock=rev_lock) as repo:
                return repo.find_out_by_relpath(self.def_path).info["md5"]
        except (NotDvcRepoError, NoOutputInExternalRepoError):
            # Fall through and clone
            pass

        repo_path = cached_clone(
            self.def_repo[self.PARAM_URL],
            rev=rev_lock or self.def_repo.get(self.PARAM_REV),
        )
        path = PathInfo(os.path.join(repo_path, self.def_path))

        return self.repo.cache.local.get_checksum(path)