예제 #1
0
파일: __init__.py 프로젝트: metalglove/dvc
    def _init_executors(self, to_run):
        executors = {}
        with self.scm.stash_workspace(include_untracked=True):
            with self.scm.detach_head():
                for stash_rev, item in to_run.items():
                    self.scm.set_ref(EXEC_HEAD, item.rev)
                    self.scm.set_ref(EXEC_MERGE, stash_rev)
                    self.scm.set_ref(EXEC_BASELINE, item.baseline_rev)

                    # Executor will be initialized with an empty git repo that
                    # we populate by pushing:
                    #   EXEC_HEAD - the base commit for this experiment
                    #   EXEC_MERGE - the unmerged changes (from our stash)
                    #       to be reproduced
                    #   EXEC_BASELINE - the baseline commit for this experiment
                    executor = LocalExecutor(
                        self.scm,
                        self.dvc_dir,
                        name=item.name,
                        branch=item.branch,
                        cache_dir=self.repo.cache.local.cache_dir,
                    )
                    executors[item.rev] = executor

                for ref in (EXEC_HEAD, EXEC_MERGE, EXEC_BASELINE):
                    self.scm.remove_ref(ref)

            self.scm.repo.git.reset(hard=True)
            self.scm.repo.git.clean(force=True)
        return executors
예제 #2
0
    def _checkpoint_callback(
        self,
        result: Mapping,
        lock: threading.Lock,
        rev: str,
        executor: LocalExecutor,
        unchanged: Iterable,
        stages: Iterable,
    ):
        lock.acquire()
        try:
            if executor.branch:
                self._scm_checkout(executor.branch)
            else:
                self._scm_checkout(executor.rev)

            exp_hash = hash_exp(stages + unchanged)
            exp_rev = self._collect_and_commit(rev,
                                               executor,
                                               exp_hash,
                                               checkpoint=True)
            if exp_rev:
                if not executor.branch:
                    branch = self._get_branch_containing(exp_rev)
                    executor.branch = branch
                logger.info("Checkpoint experiment iteration '%s'.",
                            exp_rev[:7])
                result[rev] = {exp_rev: exp_hash}
        finally:
            lock.release()
예제 #3
0
    def reproduce(
        self,
        revs: Optional[Iterable] = None,
        keep_stash: Optional[bool] = True,
        checkpoint: Optional[bool] = False,
        **kwargs,
    ):
        """Reproduce the specified experiments.

        Args:
            revs: If revs is not specified, all stashed experiments will be
                reproduced.
            keep_stash: If True, stashed experiments will be preserved if they
                fail to reproduce successfully.
        """
        stash_revs = self.stash_revs

        # to_run contains mapping of:
        #   input_rev: (stash_index, baseline_rev)
        # where input_rev contains the changes to execute (usually a stash
        # commit) and baseline_rev is the baseline to compare output against.
        # The final experiment commit will be branched from baseline_rev.
        if revs is None:
            to_run = dict(stash_revs)
        else:
            to_run = {
                rev: stash_revs[rev] if rev in stash_revs else self.StashEntry(
                    None, rev, None)
                for rev in revs
            }

        logger.debug(
            "Reproducing experiment revs '%s'",
            ", ".join((rev[:7] for rev in to_run)),
        )

        # setup executors - unstash experiment, generate executor, upload
        # contents of (unstashed) exp workspace to the executor tree
        executors = {}
        for rev, item in to_run.items():
            self._scm_checkout(item.baseline_rev)
            self.scm.repo.git.stash("apply", rev)
            repro_args, repro_kwargs = self._unpack_args()
            executor = LocalExecutor(
                item.baseline_rev,
                branch=item.branch,
                repro_args=repro_args,
                repro_kwargs=repro_kwargs,
                dvc_dir=self.dvc_dir,
                cache_dir=self.repo.cache.local.cache_dir,
            )
            self._collect_input(executor)
            executors[rev] = executor

        if checkpoint:
            exec_results = self._reproduce_checkpoint(executors)
        else:
            exec_results = self._reproduce(executors, **kwargs)

        if keep_stash:
            # only drop successfully run stashed experiments
            to_drop = sorted(
                (stash_revs[rev][0]
                 for rev in exec_results if rev in stash_revs),
                reverse=True,
            )
        else:
            # drop all stashed experiments
            to_drop = sorted(
                (stash_revs[rev][0] for rev in to_run if rev in stash_revs),
                reverse=True,
            )
        for index in to_drop:
            self.scm.repo.git.stash("drop", index)

        result = {}
        for _, exp_result in exec_results.items():
            result.update(exp_result)
        return result
예제 #4
0
    def reproduce(
        self,
        revs: Optional[Iterable] = None,
        keep_stash: Optional[bool] = True,
        **kwargs,
    ):
        """Reproduce the specified experiments.

        Args:
            revs: If revs is not specified, all stashed experiments will be
                reproduced.
            keep_stash: If True, stashed experiments will be preserved if they
                fail to reproduce successfully.
        """
        stash_revs = self.stash_revs

        # to_run contains mapping of:
        #   input_rev: (stash_index, baseline_rev)
        # where input_rev contains the changes to execute (usually a stash
        # commit) and baseline_rev is the baseline to compare output against.
        # The final experiment commit will be branched from baseline_rev.
        if revs is None:
            to_run = {
                rev: baseline_rev
                for rev, (_, baseline_rev) in stash_revs.items()
            }
        else:
            to_run = {
                rev: stash_revs[rev][1] if rev in stash_revs else rev
                for rev in revs
            }

        # setup executors
        executors = {}
        for rev, baseline_rev in to_run.items():
            tree = self.scm.get_tree(rev)
            repro_args, repro_kwargs = self._unpack_args(tree)
            executor = LocalExecutor(
                tree,
                baseline_rev,
                repro_args=repro_args,
                repro_kwargs=repro_kwargs,
                dvc_dir=self.dvc_dir,
                cache_dir=self.repo.cache.local.cache_dir,
            )
            executors[rev] = executor

        exec_results = self._reproduce(executors, **kwargs)

        if keep_stash:
            # only drop successfully run stashed experiments
            to_drop = sorted(
                (stash_revs[rev][0]
                 for rev in exec_results if rev in stash_revs),
                reverse=True,
            )
        else:
            # drop all stashed experiments
            to_drop = sorted(
                (stash_revs[rev][0] for rev in to_run if rev in stash_revs),
                reverse=True,
            )
        for index in to_drop:
            self.scm.repo.git.stash("drop", index)

        result = {}
        for _, exp_result in exec_results.items():
            result.update(exp_result)
        return result