def _init_executors(self, to_run): executors = {} with self.scm.stash_workspace(include_untracked=True): with self.scm.detach_head(): for stash_rev, item in to_run.items(): self.scm.set_ref(EXEC_HEAD, item.rev) self.scm.set_ref(EXEC_MERGE, stash_rev) self.scm.set_ref(EXEC_BASELINE, item.baseline_rev) # Executor will be initialized with an empty git repo that # we populate by pushing: # EXEC_HEAD - the base commit for this experiment # EXEC_MERGE - the unmerged changes (from our stash) # to be reproduced # EXEC_BASELINE - the baseline commit for this experiment executor = LocalExecutor( self.scm, self.dvc_dir, name=item.name, branch=item.branch, cache_dir=self.repo.cache.local.cache_dir, ) executors[item.rev] = executor for ref in (EXEC_HEAD, EXEC_MERGE, EXEC_BASELINE): self.scm.remove_ref(ref) self.scm.repo.git.reset(hard=True) self.scm.repo.git.clean(force=True) return executors
def _checkpoint_callback( self, result: Mapping, lock: threading.Lock, rev: str, executor: LocalExecutor, unchanged: Iterable, stages: Iterable, ): lock.acquire() try: if executor.branch: self._scm_checkout(executor.branch) else: self._scm_checkout(executor.rev) exp_hash = hash_exp(stages + unchanged) exp_rev = self._collect_and_commit(rev, executor, exp_hash, checkpoint=True) if exp_rev: if not executor.branch: branch = self._get_branch_containing(exp_rev) executor.branch = branch logger.info("Checkpoint experiment iteration '%s'.", exp_rev[:7]) result[rev] = {exp_rev: exp_hash} finally: lock.release()
def reproduce( self, revs: Optional[Iterable] = None, keep_stash: Optional[bool] = True, checkpoint: Optional[bool] = False, **kwargs, ): """Reproduce the specified experiments. Args: revs: If revs is not specified, all stashed experiments will be reproduced. keep_stash: If True, stashed experiments will be preserved if they fail to reproduce successfully. """ stash_revs = self.stash_revs # to_run contains mapping of: # input_rev: (stash_index, baseline_rev) # where input_rev contains the changes to execute (usually a stash # commit) and baseline_rev is the baseline to compare output against. # The final experiment commit will be branched from baseline_rev. if revs is None: to_run = dict(stash_revs) else: to_run = { rev: stash_revs[rev] if rev in stash_revs else self.StashEntry( None, rev, None) for rev in revs } logger.debug( "Reproducing experiment revs '%s'", ", ".join((rev[:7] for rev in to_run)), ) # setup executors - unstash experiment, generate executor, upload # contents of (unstashed) exp workspace to the executor tree executors = {} for rev, item in to_run.items(): self._scm_checkout(item.baseline_rev) self.scm.repo.git.stash("apply", rev) repro_args, repro_kwargs = self._unpack_args() executor = LocalExecutor( item.baseline_rev, branch=item.branch, repro_args=repro_args, repro_kwargs=repro_kwargs, dvc_dir=self.dvc_dir, cache_dir=self.repo.cache.local.cache_dir, ) self._collect_input(executor) executors[rev] = executor if checkpoint: exec_results = self._reproduce_checkpoint(executors) else: exec_results = self._reproduce(executors, **kwargs) if keep_stash: # only drop successfully run stashed experiments to_drop = sorted( (stash_revs[rev][0] for rev in exec_results if rev in stash_revs), reverse=True, ) else: # drop all stashed experiments to_drop = sorted( (stash_revs[rev][0] for rev in to_run if rev in stash_revs), reverse=True, ) for index in to_drop: self.scm.repo.git.stash("drop", index) result = {} for _, exp_result in exec_results.items(): result.update(exp_result) return result
def reproduce( self, revs: Optional[Iterable] = None, keep_stash: Optional[bool] = True, **kwargs, ): """Reproduce the specified experiments. Args: revs: If revs is not specified, all stashed experiments will be reproduced. keep_stash: If True, stashed experiments will be preserved if they fail to reproduce successfully. """ stash_revs = self.stash_revs # to_run contains mapping of: # input_rev: (stash_index, baseline_rev) # where input_rev contains the changes to execute (usually a stash # commit) and baseline_rev is the baseline to compare output against. # The final experiment commit will be branched from baseline_rev. if revs is None: to_run = { rev: baseline_rev for rev, (_, baseline_rev) in stash_revs.items() } else: to_run = { rev: stash_revs[rev][1] if rev in stash_revs else rev for rev in revs } # setup executors executors = {} for rev, baseline_rev in to_run.items(): tree = self.scm.get_tree(rev) repro_args, repro_kwargs = self._unpack_args(tree) executor = LocalExecutor( tree, baseline_rev, repro_args=repro_args, repro_kwargs=repro_kwargs, dvc_dir=self.dvc_dir, cache_dir=self.repo.cache.local.cache_dir, ) executors[rev] = executor exec_results = self._reproduce(executors, **kwargs) if keep_stash: # only drop successfully run stashed experiments to_drop = sorted( (stash_revs[rev][0] for rev in exec_results if rev in stash_revs), reverse=True, ) else: # drop all stashed experiments to_drop = sorted( (stash_revs[rev][0] for rev in to_run if rev in stash_revs), reverse=True, ) for index in to_drop: self.scm.repo.git.stash("drop", index) result = {} for _, exp_result in exec_results.items(): result.update(exp_result) return result