def make(name, *, scm=False, dvc=False, subdir=False): from shutil import ignore_patterns from dvc.repo import Repo from dvc.scm.git import Git from dvc.utils.fs import fs_copy cache = CACHE.get((scm, dvc, subdir)) if not cache: cache = tmp_path_factory.mktemp("dvc-test-cache" + worker_id) TmpDir(cache).init(scm=scm, dvc=dvc, subdir=subdir) CACHE[(scm, dvc, subdir)] = os.fspath(cache) path = tmp_path_factory.mktemp(name) if isinstance(name, str) else name # ignore sqlite files from .dvc/tmp. We might not be closing the cache # connection resulting in PermissionErrors in Windows. ignore = ignore_patterns("cache.db*") for entry in os.listdir(cache): # shutil.copytree's dirs_exist_ok is only available in >=3.8 fs_copy( os.path.join(cache, entry), os.path.join(path, entry), ignore=ignore, ) new_dir = TmpDir(path) str_path = os.fspath(new_dir) if dvc: new_dir.dvc = Repo(str_path) if scm: new_dir.scm = (new_dir.dvc.scm if hasattr(new_dir, "dvc") else Git(str_path)) request.addfinalizer(new_dir.close) return new_dir
def pull_to(self, path, to_info): """ Pull the corresponding file or directory specified by `path` and checkout it into `to_info`. It works with files tracked by Git and DVC, and also local files outside the repository. """ out = None path_info = PathInfo(self.root_dir) / path with suppress(OutputNotFoundError): (out, ) = self.find_outs_by_path(fspath(path_info), strict=False) try: if out and out.use_cache: self._pull_cached(out, path_info, to_info) return # Check if it is handled by Git (it can't have an absolute path) if os.path.isabs(path): raise FileNotFoundError fs_copy(fspath(path_info), fspath(to_info)) except FileNotFoundError: raise PathMissingError(path, self.url)
def pull_to(self, path, to_info): try: # Git handled files can't have absolute path if os.path.isabs(path): raise FileNotFoundError fs_copy(os.path.join(self.root_dir, path), fspath(to_info)) except FileNotFoundError: raise PathMissingError(path, self.url)
def _copy_if_git_file(self, to_path): src_path = self.def_path repo_dir = cached_clone(**self.def_repo) if not self._is_git_file(repo_dir, src_path): return False src_full_path = os.path.join(repo_dir, src_path) dst_full_path = os.path.abspath(to_path) fs_copy(src_full_path, dst_full_path) return True
def _copy_if_git_file(self, to_path): src_path = self.def_path with self._make_repo( cache_dir=self.repo.cache.local.cache_dir) as repo: if not self._is_git_file(repo, src_path): return False src_full_path = os.path.join(repo.root_dir, src_path) dst_full_path = os.path.abspath(to_path) fs_copy(src_full_path, dst_full_path) return True
def _copy_if_git_file(self, to_path): src_path = self.def_path repo_dir = cached_clone(**self.def_repo) if not self._is_git_file(repo_dir, src_path): return False src_full_path = os.path.join(repo_dir, src_path) dst_full_path = os.path.abspath(to_path) fs_copy(src_full_path, dst_full_path) self.def_repo[self.PARAM_REV_LOCK] = SCM(repo_dir).get_rev() return True
def pull_to(self, path, to_info): try: out = None with suppress(OutputNotFoundError): out = self.find_out_by_relpath(path) if out and out.use_cache: self._pull_cached(out, to_info) return # Git handled files can't have absolute path if os.path.isabs(path): raise FileNotFoundError fs_copy(os.path.join(self.root_dir, path), fspath(to_info)) except FileNotFoundError: raise PathMissingError(path, self.url)
def get(url, path, out=None, rev=None): out = resolve_output(path, out) if Stage.is_valid_filename(out): raise GetDVCFileError() # Creating a directory right beside the output to make sure that they # are on the same filesystem, so we could take the advantage of # reflink and/or hardlink. Not using tempfile.TemporaryDirectory # because it will create a symlink to tmpfs, which defeats the purpose # and won't work with reflink/hardlink. dpath = os.path.dirname(os.path.abspath(out)) tmp_dir = os.path.join(dpath, "." + str(shortuuid.uuid())) try: with external_repo(cache_dir=tmp_dir, url=url, rev=rev) as repo: # Try any links possible to avoid data duplication. # # Not using symlink, because we need to remove cache after we are # done, and to make that work we would have to copy data over # anyway before removing the cache, so we might just copy it # right away. # # Also, we can't use theoretical "move" link type here, because # the same cache file might be used a few times in a directory. repo.cache.local.cache_types = ["reflink", "hardlink", "copy"] try: output = repo.find_out_by_relpath(path) except OutputNotFoundError: output = None if output and output.use_cache: _get_cached(repo, output, out) else: # Either an uncached out with absolute path or a user error if os.path.isabs(path): raise FileNotFoundError fs_copy(os.path.join(repo.root_dir, path), out) except (OutputNotFoundError, FileNotFoundError): raise PathMissingError(path, url) except NotDvcRepoError: raise UrlNotDvcRepoError(url) finally: remove(tmp_dir)
def get(url, path, out=None, rev=None): out = resolve_output(path, out) if Stage.is_valid_filename(out): raise GetDVCFileError() # Creating a directory right beside the output to make sure that they # are on the same filesystem, so we could take the advantage of # reflink and/or hardlink. Not using tempfile.TemporaryDirectory # because it will create a symlink to tmpfs, which defeats the purpose # and won't work with reflink/hardlink. dpath = os.path.dirname(os.path.abspath(out)) tmp_dir = os.path.join(dpath, "." + str(shortuuid.uuid())) try: try: with external_repo(cache_dir=tmp_dir, url=url, rev=rev) as repo: # Try any links possible to avoid data duplication. # # Not using symlink, because we need to remove cache after we # are done, and to make that work we would have to copy data # over anyway before removing the cache, so we might just copy # it right away. # # Also, we can't use theoretical "move" link type here, because # the same cache file might be used a few times in a directory. repo.cache.local.cache_types = ["reflink", "hardlink", "copy"] output = repo.find_out_by_relpath(path) if output.use_cache: _get_cached(repo, output, out) return # Non-cached output, fall through and try to copy from git. except (NotDvcRepoError, NoOutputInExternalRepoError): # Not a DVC repository or, possibly, path is not tracked by DVC. # Fall through and try to copy from git. pass if os.path.isabs(path): raise FileNotFoundError repo_dir = cached_clone(url, rev=rev) fs_copy(os.path.join(repo_dir, path), out) except (OutputNotFoundError, FileNotFoundError): raise PathMissingError(path, url) finally: remove(tmp_dir)
def make(name, *, scm=False, dvc=False, subdir=False): from dvc.repo import Repo from dvc.scm.git import Git from dvc.utils.fs import fs_copy cache = CACHE.get((scm, dvc, subdir)) if not cache: cache = tmp_path_factory.mktemp("dvc-test-cache" + worker_id) TmpDir(cache).init(scm=scm, dvc=dvc, subdir=subdir) CACHE[(scm, dvc, subdir)] = os.fspath(cache) path = tmp_path_factory.mktemp(name) if isinstance(name, str) else name for entry in os.listdir(cache): # shutil.copytree's dirs_exist_ok is only available in >=3.8 fs_copy(os.path.join(cache, entry), os.path.join(path, entry)) new_dir = TmpDir(path) str_path = os.fspath(new_dir) if dvc: new_dir.dvc = Repo(str_path) if scm: new_dir.scm = (new_dir.dvc.scm if hasattr(new_dir, "dvc") else Git(str_path)) request.addfinalizer(new_dir.close) return new_dir