def _git_checkout(repo_path, rev): logger.debug("erepo: git checkout %s@%s", repo_path, rev) git = Git(repo_path) try: git.checkout(rev) finally: git.close()
def git(tmp_dir, scm, request): from dvc.scm.git import Git git_ = Git(os.fspath(tmp_dir), backends=[request.param]) git_.test_backend = request.param yield git_ git_.close()
def _clone_default_branch(url, rev): """Get or create a clean clone of the url. The cloned is reactualized with git pull unless rev is a known sha. """ clone_path = CLONES.get(url) git = None try: if clone_path: git = Git(clone_path) # Do not pull for known shas, branches and tags might move if not Git.is_sha(rev) or not git.has_rev(rev): logger.debug("erepo: git pull %s", url) git.pull() else: logger.debug("erepo: git clone %s to a temporary dir", url) clone_path = tempfile.mkdtemp("dvc-clone") git = Git.clone(url, clone_path) CLONES[url] = clone_path finally: if git: git.close() return clone_path
def _git_checkout(repo_path, rev): from dvc.scm import Git git = Git(repo_path) try: git.checkout(rev) finally: git.close()
def _git_checkout(repo_path, rev): from dvc.scm.git import Git logger.debug("erepo: git checkout %s@%s", repo_path, rev) git = Git(repo_path) try: git.checkout(rev) finally: git.close()
def _clone_default_branch(url, rev, for_write=False): """Get or create a clean clone of the url. The cloned is reactualized with git pull unless rev is a known sha. """ from dvc.scm.git import Git clone_path, shallow = CLONES.get(url, (None, False)) git = None try: if clone_path: git = Git(clone_path) # Do not pull for known shas, branches and tags might move if not Git.is_sha(rev) or not git.has_rev(rev): if shallow: # If we are missing a rev in a shallow clone, fallback to # a full (unshallowed) clone. Since fetching specific rev # SHAs is only available in certain git versions, if we # have need to reference multiple specific revs for a # given repo URL it is easier/safer for us to work with # full clones in this case. logger.debug("erepo: unshallowing clone for '%s'", url) _unshallow(git) shallow = False CLONES[url] = (clone_path, shallow) else: logger.debug("erepo: git pull '%s'", url) git.pull() else: logger.debug("erepo: git clone '%s' to a temporary dir", url) clone_path = tempfile.mkdtemp("dvc-clone") if not for_write and rev and not Git.is_sha(rev): # If rev is a tag or branch name try shallow clone first from dvc.scm.base import CloneError try: git = Git.clone(url, clone_path, shallow_branch=rev) shallow = True logger.debug( "erepo: using shallow clone for branch '%s'", rev ) except CloneError: pass if not git: git = Git.clone(url, clone_path) shallow = False CLONES[url] = (clone_path, shallow) finally: if git: git.close() return clone_path, shallow
def reproduce( cls, dvc_dir: str, queue: "Queue", rev: str, cwd: Optional[str] = None, name: Optional[str] = None, ) -> Tuple[bool, Optional[str]]: """Run dvc repro and return the result. Returns tuple of (exp_hash, force) where exp_hash is the experiment hash (or None on error) and force is a bool specifying whether or not this experiment should force overwrite any existing duplicates. """ unchanged = [] queue.put((rev, os.getpid())) def filter_pipeline(stages): unchanged.extend([ stage for stage in stages if isinstance(stage, PipelineStage) ]) result = None force = False try: dvc = Repo(dvc_dir) old_cwd = os.getcwd() new_cwd = cwd if cwd else dvc.root_dir os.chdir(new_cwd) logger.debug("Running repro in '%s'", cwd) args_path = os.path.join(dvc.tmp_dir, BaseExecutor.PACKED_ARGS_FILE) if os.path.exists(args_path): args, kwargs = BaseExecutor.unpack_repro_args(args_path) remove(args_path) else: args = [] kwargs = {} force = kwargs.get("force", False) # NOTE: for checkpoint experiments we handle persist outs slightly # differently than normal: # # - checkpoint out may not yet exist if this is the first time this # experiment has been run, this is not an error condition for # experiments # - at the start of a repro run, we need to remove the persist out # and restore it to its last known (committed) state (which may # be removed/does not yet exist) so that our executor workspace # is not polluted with the (persistent) out from an unrelated # experiment run dvc.checkout(force=True, quiet=True) # We cannot use dvc.scm to make commits inside the executor since # cached props are not picklable. scm = Git() checkpoint_func = partial(cls.checkpoint_callback, scm, name) stages = dvc.reproduce( *args, on_unchanged=filter_pipeline, checkpoint_func=checkpoint_func, **kwargs, ) exp_hash = cls.hash_exp(stages) exp_rev = cls.commit(scm, exp_hash, exp_name=name) if scm.get_ref(EXEC_CHECKPOINT): scm.set_ref(EXEC_CHECKPOINT, exp_rev) except UnchangedExperimentError: pass finally: if scm: scm.close() del scm if old_cwd: os.chdir(old_cwd) # ideally we would return stages here like a normal repro() call, but # stages is not currently picklable and cannot be returned across # multiprocessing calls return result, force
def _git_checkout(repo_path, rev): git = Git(repo_path) try: git.checkout(rev) finally: git.close()