def test_download_callbacks_on_dvc_git_fs(tmp_dir, dvc, scm, fs_type): from dvc.fs.git import GitFileSystem gen = tmp_dir.scm_gen if fs_type == "git" else tmp_dir.dvc_gen gen({"dir": {"foo": "foo", "bar": "bar"}, "file": "file"}, commit="gen") fs = dvc.dvcfs if fs_type == "dvc" else GitFileSystem(scm=scm, rev="HEAD") callback = fsspec.Callback() fs.download_file( "file", (tmp_dir / "file2").fs_path, callback=callback, ) size = os.path.getsize(tmp_dir / "file") assert (tmp_dir / "file2").read_text() == "file" assert callback.size == size assert callback.value == size callback = fsspec.Callback() fs.download( "dir", (tmp_dir / "dir2").fs_path, callback=callback, ) assert (tmp_dir / "dir2").read_text() == {"foo": "foo", "bar": "bar"} assert callback.size == 2 assert callback.value == 2
def get_fs(self, rev: str, **kwargs): from dvc.fs.git import GitFileSystem from .objects import GitTrie resolved = self.resolve_rev(rev) tree_obj = self._backend_func("get_tree_obj", rev=resolved) trie = GitTrie(tree_obj, resolved) return GitFileSystem(self.root_dir, trie, **kwargs)
def get_fs(self, rev: str): from dvc.fs.git import GitFileSystem from .objects import GitTrie resolved = self.resolve_rev(rev) tree_obj = self.pygit2.get_tree_obj(rev=resolved) trie = GitTrie(tree_obj, resolved) return GitFileSystem(self.root_dir, trie)
def test_ignore_on_branch(tmp_dir, scm, dvc): from dvc.fs.git import GitFileSystem tmp_dir.scm_gen({"foo": "foo", "bar": "bar"}, commit="add files") with tmp_dir.branch("branch", new=True): tmp_dir.scm_gen(DvcIgnore.DVCIGNORE_FILE, "foo", commit="add ignore") dvc._reset() result = walk_files(dvc, dvc.fs, tmp_dir) assert set(result) == { (tmp_dir / "foo").fs_path, (tmp_dir / "bar").fs_path, (tmp_dir / DvcIgnore.DVCIGNORE_FILE).fs_path, } dvc.fs = GitFileSystem(scm=scm, rev="branch") assert dvc.dvcignore.is_ignored_file(tmp_dir / "foo")
def _get_repo_dirs( self, root_dir: str = None, scm: "Base" = None, rev: str = None, uninitialized: bool = False, ): assert bool(scm) == bool(rev) from dvc.fs.git import GitFileSystem from dvc.scm import SCM, Base, Git, SCMError from dvc.utils.fs import makedirs dvc_dir = None tmp_dir = None try: fs = ( GitFileSystem(scm=scm, rev=rev) if isinstance(scm, Git) and rev else None ) root_dir = self.find_root(root_dir, fs) dvc_dir = os.path.join(root_dir, self.DVC_DIR) tmp_dir = os.path.join(dvc_dir, "tmp") makedirs(tmp_dir, exist_ok=True) except NotDvcRepoError: if not uninitialized: raise try: scm = SCM(root_dir or os.curdir) except SCMError: scm = SCM(os.curdir, no_scm=True) assert isinstance(scm, Base) root_dir = scm.root_dir return root_dir, dvc_dir, tmp_dir
def __init__( self, root_dir=None, fs=None, rev=None, subrepos=False, uninitialized=False, config=None, url=None, repo_factory=None, ): from dvc.config import Config from dvc.data.db import ODBManager from dvc.data_cloud import DataCloud from dvc.fs.git import GitFileSystem from dvc.fs.local import localfs from dvc.lock import LockNoop, make_lock from dvc.repo.live import Live from dvc.repo.metrics import Metrics from dvc.repo.params import Params from dvc.repo.plots import Plots from dvc.repo.stage import StageLoad from dvc.scm import SCM from dvc.stage.cache import StageCache from dvc.state import State, StateNoop self.url = url self._fs_conf = {"repo_factory": repo_factory} self._fs = fs or localfs self._scm = None if rev and not fs: self._scm = SCM(root_dir or os.curdir) self._fs = GitFileSystem(scm=self._scm, rev=rev) self.root_dir, self.dvc_dir, self.tmp_dir = self._get_repo_dirs( root_dir=root_dir, fs=self.fs, uninitialized=uninitialized) self.config = Config(self.dvc_dir, fs=self.fs, config=config) self._uninitialized = uninitialized # used by RepoFileSystem to determine if it should traverse subrepos self.subrepos = subrepos self.cloud = DataCloud(self) self.stage = StageLoad(self) if isinstance(self.fs, GitFileSystem) or not self.dvc_dir: self.lock = LockNoop() self.state = StateNoop() self.odb = ODBManager(self) else: self.lock = make_lock( os.path.join(self.tmp_dir, "lock"), tmp_dir=self.tmp_dir, hardlink_lock=self.config["core"].get("hardlink_lock", False), friendly=True, ) state_db_dir = self._get_database_dir("state") self.state = State(self.root_dir, state_db_dir, self.dvcignore) self.odb = ODBManager(self) self.stage_cache = StageCache(self) self._ignore() self.metrics = Metrics(self) self.plots = Plots(self) self.params = Params(self) self.live = Live(self) self.stage_collection_error_handler: Optional[Callable[ [str, Exception], None]] = None self._lock_depth = 0
def get_fs(self, rev: str): from dvc.fs.git import GitFileSystem return GitFileSystem(scm=self, rev=rev)
def external_repo(url, rev=None, for_write=False, cache_dir=None, cache_types=None, **kwargs): from scmrepo.git import Git from dvc.config import NoRemoteError from dvc.fs.git import GitFileSystem logger.debug("Creating external repo %s@%s", url, rev) path = _cached_clone(url, rev, for_write=for_write) # Local HEAD points to the tip of whatever branch we first cloned from # (which may not be the default branch), use origin/HEAD here to get # the tip of the default branch rev = rev or "refs/remotes/origin/HEAD" cache_config = { "cache": { "dir": cache_dir or _get_cache_dir(url), "type": cache_types } } config = _get_remote_config(url) if os.path.isdir(url) else {} config.update(cache_config) if for_write: root_dir = path fs = None else: root_dir = os.path.realpath(path) scm = Git(root_dir) fs = GitFileSystem(scm=scm, rev=rev) repo_kwargs = dict( root_dir=root_dir, url=url, fs=fs, config=config, repo_factory=erepo_factory(url, cache_config), **kwargs, ) if "subrepos" not in repo_kwargs: repo_kwargs["subrepos"] = True if "uninitialized" not in repo_kwargs: repo_kwargs["uninitialized"] = True repo = Repo(**repo_kwargs) try: yield repo except NoRemoteError as exc: raise NoRemoteInExternalRepoError(url) from exc except OutputNotFoundError as exc: if exc.repo is repo: raise NoOutputInExternalRepoError(exc.output, repo.root_dir, url) from exc raise except FileMissingError as exc: raise PathMissingError(exc.path, url) from exc finally: repo.close() if for_write: _remove(path)