def test_push_wildcard_from_bare_git_repo(tmp_dir, make_tmp_dir, erepo_dir, local_cloud): Git.init(tmp_dir.fs_path, bare=True).close() erepo_dir.add_remote(config=local_cloud.config) with erepo_dir.chdir(): erepo_dir.dvc_gen( { "dir123": { "foo": "foo content" }, "dirextra": { "extrafoo": "extra foo content" }, }, commit="initial", ) erepo_dir.dvc.push([os.path.join(os.fspath(erepo_dir), "dire*")], glob=True) erepo_dir.scm.gitpython.repo.create_remote("origin", os.fspath(tmp_dir)) erepo_dir.scm.gitpython.repo.remote("origin").push("master") dvc_repo = make_tmp_dir("dvc-repo", scm=True, dvc=True) with dvc_repo.chdir(): dvc_repo.dvc.imp(os.fspath(tmp_dir), "dirextra") with pytest.raises(PathMissingError): dvc_repo.dvc.imp(os.fspath(tmp_dir), "dir123")
def git(tmp_dir, scm, request): from scmrepo.git import Git git_ = Git(os.fspath(tmp_dir), backends=[request.param]) git_.test_backend = request.param yield git_ git_.close()
def test_no_commits(tmp_dir): from scmrepo.git import Git from tests.dir_helpers import git_init git_init(".") assert Git().no_commits tmp_dir.gen("foo", "foo") Git().add(["foo"]) Git().commit("foo") assert not Git().no_commits
def test_import_from_bare_git_repo(tmp_dir, make_tmp_dir, erepo_dir, local_cloud): Git.init(tmp_dir.fs_path, bare=True).close() erepo_dir.add_remote(config=local_cloud.config) with erepo_dir.chdir(): erepo_dir.dvc_gen({"foo": "foo"}, commit="initial") erepo_dir.dvc.push() erepo_dir.scm.gitpython.repo.create_remote("origin", os.fspath(tmp_dir)) erepo_dir.scm.gitpython.repo.remote("origin").push("master") dvc_repo = make_tmp_dir("dvc-repo", scm=True, dvc=True) with dvc_repo.chdir(): dvc_repo.dvc.imp(os.fspath(tmp_dir), "foo")
def _clone_default_branch(url, rev, for_write=False): """Get or create a clean clone of the url. The cloned is reactualized with git pull unless rev is a known sha. """ from scmrepo.git import Git clone_path, shallow = CLONES.get(url, (None, False)) git = None try: if clone_path: git = Git(clone_path) # Do not pull for known shas, branches and tags might move if not Git.is_sha(rev) or not git.has_rev(rev): if shallow: # If we are missing a rev in a shallow clone, fallback to # a full (unshallowed) clone. Since fetching specific rev # SHAs is only available in certain git versions, if we # have need to reference multiple specific revs for a # given repo URL it is easier/safer for us to work with # full clones in this case. logger.debug("erepo: unshallowing clone for '%s'", url) _unshallow(git) shallow = False CLONES[url] = (clone_path, shallow) else: logger.debug("erepo: git pull '%s'", url) git.pull() else: from dvc.scm import clone logger.debug("erepo: git clone '%s' to a temporary dir", url) clone_path = tempfile.mkdtemp("dvc-clone") if not for_write and rev and not Git.is_sha(rev): # If rev is a tag or branch name try shallow clone first try: git = clone(url, clone_path, shallow_branch=rev) shallow = True logger.debug("erepo: using shallow clone for branch '%s'", rev) except CloneError: pass if not git: git = clone(url, clone_path) shallow = False CLONES[url] = (clone_path, shallow) finally: if git: git.close() return clone_path, shallow
def ls(repo, *args, rev=None, git_remote=None, all_=False, **kwargs): from scmrepo.git import Git from dvc.scm import RevError, resolve_rev if rev: try: rev = resolve_rev(repo.scm, rev) except RevError: if not (git_remote and Git.is_sha(rev)): # This could be a remote rev that has not been fetched yet raise elif not all_: rev = repo.scm.get_rev() results = defaultdict(list) if rev: if git_remote: gen = remote_exp_refs_by_baseline(repo.scm, git_remote, rev) else: gen = exp_refs_by_baseline(repo.scm, rev) for info in gen: results[rev].append(info.name) elif all_: if git_remote: gen = remote_exp_refs(repo.scm, git_remote) else: gen = exp_refs(repo.scm) for info in gen: results[info.baseline_sha].append(info.name) return results
def make(name, *, scm=False, dvc=False, subdir=False): # pylint: disable=W0621 from shutil import ignore_patterns from scmrepo.git import Git from dvc.repo import Repo from .tmp_dir import TmpDir cache = CACHE.get((scm, dvc, subdir)) if not cache: cache = tmp_path_factory.mktemp("dvc-test-cache" + worker_id) TmpDir(cache).init(scm=scm, dvc=dvc, subdir=subdir) CACHE[(scm, dvc, subdir)] = os.fspath(cache) path = tmp_path_factory.mktemp(name) if isinstance(name, str) else name # ignore sqlite files from .dvc/tmp. We might not be closing the cache # connection resulting in PermissionErrors in Windows. ignore = ignore_patterns("cache.db*") for entry in os.listdir(cache): # shutil.copytree's dirs_exist_ok is only available in >=3.8 _fs_copy( os.path.join(cache, entry), os.path.join(path, entry), ignore=ignore, ) new_dir = TmpDir(path) str_path = os.fspath(new_dir) if dvc: new_dir.dvc = Repo(str_path) if scm: new_dir.scm = (new_dir.dvc.scm if hasattr(new_dir, "dvc") else Git(str_path)) request.addfinalizer(new_dir.close) return new_dir
def clone(url: str, to_path: str, **kwargs): from scmrepo.exceptions import CloneError as InternalCloneError with TqdmGit(desc="Cloning") as pbar: try: return Git.clone(url, to_path, progress=pbar.update_git, **kwargs) except InternalCloneError as exc: raise CloneError(str(exc))
def test_no_commits(tmp_dir): from scmrepo.git import Git from dvc.repo import Repo git = Git.init(tmp_dir.fs_path) assert git.no_commits assert Repo.init().metrics.diff() == {}
def test_no_commits(tmp_dir): from scmrepo.git import Git from dvc.repo import Repo from tests.dir_helpers import git_init git_init(".") assert Git().no_commits assert Repo.init().metrics.diff() == {}
def init(self, *, scm=False, dvc=False, subdir=False): from scmrepo.git import Git from dvc.repo import Repo assert not scm or not hasattr(self, "scm") assert not dvc or not hasattr(self, "dvc") if scm: Git.init(self.fs_path).close() if dvc: self.dvc = Repo.init( self.fs_path, no_scm=not scm and not hasattr(self, "scm"), subdir=subdir, ) if scm: self.scm = (self.dvc.scm if hasattr(self, "dvc") else Git(self.fs_path)) if dvc and hasattr(self, "scm"): self.scm.commit("init dvc")
def clone(url: str, to_path: str, **kwargs): from scmrepo.exceptions import CloneError as InternalCloneError from dvc.repo.experiments.utils import fetch_all_exps with TqdmGit(desc="Cloning") as pbar: try: git = Git.clone(url, to_path, progress=pbar.update_git, **kwargs) if "shallow_branch" not in kwargs: fetch_all_exps(git, "origin", progress=pbar.update_git) return git except InternalCloneError as exc: raise CloneError(str(exc))
def _git_checkout(repo_path, rev): from scmrepo.git import Git logger.debug("erepo: git checkout %s@%s", repo_path, rev) git = Git(repo_path) try: git.checkout(rev) finally: git.close()
def SCM(root_dir, search_parent_directories=True, no_scm=False): # pylint: disable=invalid-name """Returns SCM instance that corresponds to a repo at the specified path. Args: root_dir (str): path to a root directory of the repo. search_parent_directories (bool): whether to look for repo root in parent directories. no_scm (bool): return NoSCM if True. Returns: dvc.scm.base.Base: SCM instance. """ with map_scm_exception(): if no_scm: return NoSCM(root_dir, _raise_not_implemented_as=NoSCMError) return Git(root_dir, search_parent_directories=search_parent_directories)
def init(self, *, scm=False, dvc=False, subdir=False): from scmrepo.git import Git from dvc.repo import Repo assert not scm or not hasattr(self, "scm") assert not dvc or not hasattr(self, "dvc") str_path = os.fspath(self) if scm: git_init(str_path) if dvc: self.dvc = Repo.init( str_path, no_scm=not scm and not hasattr(self, "scm"), subdir=subdir, ) if scm: self.scm = self.dvc.scm if hasattr(self, "dvc") else Git(str_path) if dvc and hasattr(self, "scm"): self.scm.commit("init dvc")
def _collect_rows( base_rev, experiments, metric_names, param_names, precision=DEFAULT_PRECISION, sort_by=None, sort_order=None, fill_value=FILL_VALUE, iso=False, ): from scmrepo.git import Git if sort_by: sort_path, sort_name, sort_type = _sort_column( sort_by, metric_names, param_names ) reverse = sort_order == "desc" experiments = _sort_exp( experiments, sort_path, sort_name, sort_type, reverse ) new_checkpoint = True for i, (rev, results) in enumerate(experiments.items()): exp = results.get("data", {}) if exp.get("running"): state = "Running" elif exp.get("queued"): state = "Queued" else: state = fill_value executor = exp.get("executor", fill_value) is_baseline = rev == "baseline" if is_baseline: name_rev = base_rev[:7] if Git.is_sha(base_rev) else base_rev else: name_rev = rev[:7] exp_name = exp.get("name", "") tip = exp.get("checkpoint_tip") parent_rev = exp.get("checkpoint_parent", "") parent_exp = experiments.get(parent_rev, {}).get("data", {}) parent_tip = parent_exp.get("checkpoint_tip") parent = "" if is_baseline: typ = "baseline" elif tip: if tip == parent_tip: typ = ( "checkpoint_tip" if new_checkpoint else "checkpoint_commit" ) elif parent_rev == base_rev: typ = "checkpoint_base" else: typ = "checkpoint_commit" parent = parent_rev[:7] elif i < len(experiments) - 1: typ = "branch_commit" else: typ = "branch_base" if not is_baseline: new_checkpoint = not (tip and tip == parent_tip) row = [ exp_name, name_rev, typ, _format_time(exp.get("timestamp"), fill_value, iso), parent, state, executor, ] fill_value = FILL_VALUE_ERRORED if results.get("error") else fill_value _extend_row( row, metric_names, exp.get("metrics", {}).items(), precision, fill_value=fill_value, ) _extend_row( row, param_names, exp.get("params", {}).items(), precision, fill_value=fill_value, ) yield row
def external_repo(url, rev=None, for_write=False, cache_dir=None, cache_types=None, **kwargs): from scmrepo.git import Git from dvc.config import NoRemoteError logger.debug("Creating external repo %s@%s", url, rev) path = _cached_clone(url, rev, for_write=for_write) # Local HEAD points to the tip of whatever branch we first cloned from # (which may not be the default branch), use origin/HEAD here to get # the tip of the default branch rev = rev or "refs/remotes/origin/HEAD" cache_config = { "cache": { "dir": cache_dir or _get_cache_dir(url), "type": cache_types } } config = _get_remote_config(url) if os.path.isdir(url) else {} config.update(cache_config) root_dir = path if for_write else os.path.realpath(path) repo_kwargs = dict( root_dir=root_dir, url=url, scm=None if for_write else Git(root_dir), rev=None if for_write else rev, config=config, repo_factory=erepo_factory(url, cache_config), **kwargs, ) if "subrepos" not in repo_kwargs: repo_kwargs["subrepos"] = True if "uninitialized" not in repo_kwargs: repo_kwargs["uninitialized"] = True repo = Repo(**repo_kwargs) try: yield repo except NoRemoteError as exc: raise NoRemoteInExternalRepoError(url) from exc except OutputNotFoundError as exc: if exc.repo is repo: raise NoOutputInExternalRepoError(exc.output, repo.root_dir, url) from exc raise except FileMissingError as exc: raise PathMissingError(exc.path, url) from exc finally: repo.close() if for_write: _remove(path)
def fix_exp_head(scm: "Git", ref: Optional[str]) -> Optional[str]: if ref: name, tail = Git.split_ref_pattern(ref) if name == "HEAD" and scm.get_ref(EXEC_BASELINE): return "".join((EXEC_BASELINE, tail)) return ref
def _collect_rows( base_rev, experiments, all_headers, metric_headers, param_headers, metric_names, param_names, deps_names, precision=DEFAULT_PRECISION, sort_by=None, sort_order=None, fill_value=FILL_VALUE, iso=False, ): from scmrepo.git import Git if sort_by: sort_path, sort_name, sort_type = _sort_column( sort_by, metric_names, param_names ) reverse = sort_order == "desc" experiments = _sort_exp( experiments, sort_path, sort_name, sort_type, reverse ) new_checkpoint = True for i, (rev, results) in enumerate(experiments.items()): fill_value = FILL_VALUE_ERRORED if results.get("error") else fill_value row_dict = {k: fill_value for k in all_headers} exp = results.get("data", {}) if exp.get("running"): state = "Running" elif exp.get("queued"): state = "Queued" else: state = fill_value is_baseline = rev == "baseline" if is_baseline: name_rev = base_rev[:7] if Git.is_sha(base_rev) else base_rev else: name_rev = rev[:7] tip = exp.get("checkpoint_tip") parent_rev = exp.get("checkpoint_parent", "") parent_exp = experiments.get(parent_rev, {}).get("data", {}) parent_tip = parent_exp.get("checkpoint_tip") parent = "" if is_baseline: typ = "baseline" elif tip: if tip == parent_tip: typ = ( "checkpoint_tip" if new_checkpoint else "checkpoint_commit" ) elif parent_rev == base_rev: typ = "checkpoint_base" else: typ = "checkpoint_commit" parent = parent_rev[:7] elif i < len(experiments) - 1: typ = "branch_commit" else: typ = "branch_base" if not is_baseline: new_checkpoint = not (tip and tip == parent_tip) row_dict["Experiment"] = exp.get("name", "") row_dict["rev"] = name_rev row_dict["typ"] = typ row_dict["Created"] = _format_time( exp.get("timestamp"), fill_value, iso ) row_dict["parent"] = parent row_dict["State"] = state row_dict["Executor"] = exp.get("executor", fill_value) _extend_row( row_dict, metric_names, metric_headers, exp.get("metrics", {}).items(), precision, fill_value=fill_value, ) _extend_row( row_dict, param_names, param_headers, exp.get("params", {}).items(), precision, fill_value=fill_value, ) for dep in deps_names: hash_info = exp.get("deps", {}).get(dep, {}).get("hash") if hash_info is not None: hash_info = hash_info[:7] row_dict[dep] = hash_info yield list(row_dict.values())
def setUp(self): from scmrepo.git import Git super().setUp() self.git = Git.init(".") self.git.add_commit(self.CODE, message="add code")