def _copy_if_git_file(self, to_path): src_path = self.def_path repo_dir = cached_clone(**self.def_repo) if not self._is_git_file(repo_dir, src_path): return False src_full_path = os.path.join(repo_dir, src_path) dst_full_path = os.path.abspath(to_path) fs_copy(src_full_path, dst_full_path) return True
def _copy_if_git_file(self, to_path): src_path = self.def_path repo_dir = cached_clone(**self.def_repo) if not self._is_git_file(repo_dir, src_path): return False src_full_path = os.path.join(repo_dir, src_path) dst_full_path = os.path.abspath(to_path) fs_copy(src_full_path, dst_full_path) self.def_repo[self.PARAM_REV_LOCK] = SCM(repo_dir).get_rev() return True
def get(url, path, out=None, rev=None): out = resolve_output(path, out) if Stage.is_valid_filename(out): raise GetDVCFileError() # Creating a directory right beside the output to make sure that they # are on the same filesystem, so we could take the advantage of # reflink and/or hardlink. Not using tempfile.TemporaryDirectory # because it will create a symlink to tmpfs, which defeats the purpose # and won't work with reflink/hardlink. dpath = os.path.dirname(os.path.abspath(out)) tmp_dir = os.path.join(dpath, "." + str(shortuuid.uuid())) try: try: with external_repo(cache_dir=tmp_dir, url=url, rev=rev) as repo: # Try any links possible to avoid data duplication. # # Not using symlink, because we need to remove cache after we # are done, and to make that work we would have to copy data # over anyway before removing the cache, so we might just copy # it right away. # # Also, we can't use theoretical "move" link type here, because # the same cache file might be used a few times in a directory. repo.cache.local.cache_types = ["reflink", "hardlink", "copy"] output = repo.find_out_by_relpath(path) if output.use_cache: _get_cached(repo, output, out) return # Non-cached output, fall through and try to copy from git. except (NotDvcRepoError, NoOutputInExternalRepoError): # Not a DVC repository or, possibly, path is not tracked by DVC. # Fall through and try to copy from git. pass if os.path.isabs(path): raise FileNotFoundError repo_dir = cached_clone(url, rev=rev) fs_copy(os.path.join(repo_dir, path), out) except (OutputNotFoundError, FileNotFoundError): raise PathMissingError(path, url) finally: remove(tmp_dir)
def _get_checksum(self, updated=False): rev_lock = None if not updated: rev_lock = self.def_repo.get(self.PARAM_REV_LOCK) try: with self._make_repo(rev_lock=rev_lock) as repo: return repo.find_out_by_relpath(self.def_path).info["md5"] except (NotDvcRepoError, NoOutputInExternalRepoError): # Fall through and clone pass repo_path = cached_clone( self.def_repo[self.PARAM_URL], rev=rev_lock or self.def_repo.get(self.PARAM_REV), ) path = PathInfo(os.path.join(repo_path, self.def_path)) return self.repo.cache.local.get_checksum(path)