예제 #1
0
    def test_keyboard_interrupt(self):
        """ ensure we can break the execution no matter of failfast=False"""
        class sleeping_estimator(Estimator):
            def __init__(self, raise_=False):
                self._raise = raise_

            def _estimate(self, X):
                if not self.raise_:
                    import time
                    time.sleep(5)
                else:
                    raise KeyboardInterrupt()

        with self.assertRaises(KeyboardInterrupt):
            estimate_param_scan(sleeping_estimator,
                                X=None,
                                param_sets=[{
                                    'raise_': (False, True)
                                }],
                                failfast=False)
예제 #2
0
 def test_evaluate_msm(self):
     from pyerna.msm.estimators import MaximumLikelihoodMSM
     dtraj = [
         0, 0, 1, 2, 1, 0, 1, 0, 1, 2, 2, 0, 0, 0, 1, 1, 2, 1, 0, 0, 1, 2,
         1, 0, 0, 0, 1, 1, 0, 1, 2
     ]  # mini-trajectory
     param_sets = param_grid({'lag': [1, 2, 3]})
     res = estimate_param_scan(MaximumLikelihoodMSM,
                               dtraj,
                               param_sets,
                               evaluate='timescales')
     self.assertIsInstance(res, list)
예제 #3
0
    def test_evaluate_msm_multi_arg(self):
        from pyerna.msm.estimators import MaximumLikelihoodMSM
        dtraj = [
            0, 0, 1, 2, 1, 0, 1, 0, 1, 2, 2, 0, 0, 0, 1, 1, 2, 1, 0, 0, 1, 2,
            1, 0, 0, 0, 1, 1, 0, 1, 2
        ]  # mini-trajectory
        traj_len = 10
        param_sets = param_grid({'lag': [1, 2, 3]})
        #     def generate_traj(self, N, start=None, stop=None, stride=1):

        res = estimate_param_scan(MaximumLikelihoodMSM,
                                  dtraj,
                                  param_sets,
                                  evaluate='generate_traj',
                                  evaluate_args=((traj_len, 2, None, 2), ))
        self.assertIsInstance(res, list)
        self.assertEqual(len(res), 3)  # three lag times
        self.assertTrue(all(len(x) == traj_len for x in res))
예제 #4
0
 def test_evaluate_bmsm_single_arg(self):
     from pyerna.msm.estimators import BayesianMSM
     dtraj = [
         0, 0, 1, 2, 1, 0, 1, 0, 1, 2, 2, 0, 0, 0, 1, 1, 2, 1, 0, 0, 1, 2,
         1, 0, 0, 0, 1, 1, 0, 1, 2
     ]  # mini-trajectory
     n_samples = 52
     param_sets = param_grid({
         'lag': [1, 2, 3],
         'show_progress': (False, ),
         'nsamples': (n_samples, )
     })
     res = estimate_param_scan(BayesianMSM,
                               dtraj,
                               param_sets,
                               evaluate='sample_f',
                               evaluate_args='timescales')
     self.assertIsInstance(res, list)
     self.assertEqual(len(res), 3)  # three lag times
     self.assertEqual(len(res[0]), n_samples)
예제 #5
0
    def _estimate(self, dtrajs):
        ### PREPARE AND CHECK DATA
        # TODO: Currently only discrete trajectories are implemented. For a general class this needs to be changed.
        dtrajs = _types.ensure_dtraj_list(dtrajs)

        # check trajectory lengths
        if self._estimated:
            # if dtrajs has now changed, unset the _estimated flag to re-set every derived quantity.
            assert hasattr(self, '_last_dtrajs_input_hash')
            current_hash = _hash_dtrajs(dtrajs)
            if self._last_dtrajs_input_hash != current_hash:
                self.logger.warning(
                    "estimating from new data, discard all previously computed models."
                )
                self._estimated = False
                self._last_dtrajs_input_hash = current_hash
        else:
            self._last_dtrajs_input_hash = _hash_dtrajs(dtrajs)

        self._trajlengths = np.fromiter((len(traj) for traj in dtrajs),
                                        dtype=int,
                                        count=len(dtrajs))
        maxlength = np.max(self._trajlengths)

        # set lag times by data if not yet set
        if self._lags is None:
            maxlag = 0.5 * np.sum(self._trajlengths) / float(
                len(self._trajlengths))
            self._lags = _generate_lags(maxlag, 1.5)

        # check if some lag times are forbidden.
        if np.max(self._lags) >= maxlength:
            Ifit = np.where(self._lags < maxlength)[0]
            Inofit = np.where(self._lags >= maxlength)[0]
            self.logger.warning(
                'Ignoring lag times that exceed the longest trajectory: %s',
                self._lags[Inofit])
            self._lags = self._lags[Ifit]

        ### RUN ESTIMATION
        if self._estimated:
            # we already had run an estimation, determine which lag times we need to compute
            # TODO: this will re-evaluate problematic lag times, wont it?
            lags = sorted(list(set(self._lags).difference(self._last_lags)))
            if len(lags) == 0:
                self.logger.info("All lag times already estimated.")
                return self
            assert lags
            self.logger.info(
                "Running estimating for not yet estimated lags times: %s",
                lags)
        else:
            lags = self._lags

        # construct all parameter sets for the estimator
        param_sets = tuple(param_grid({'lag': lags}))

        # run estimation on all lag times
        if hasattr(self.estimator, 'show_progress'):
            self.estimator.show_progress = False
        if self.show_progress:
            pg = ProgressReporter()
            ctx = pg.context()
        else:
            pg = None
            # TODO: replace with nullcontext from util once merged.
            from contextlib import contextmanager

            @contextmanager
            def dummy():
                yield

            ctx = dummy()
        with ctx:
            if not self.only_timescales:
                models, estimators = estimate_param_scan(
                    self.estimator,
                    dtrajs,
                    param_sets,
                    failfast=False,
                    return_estimators=True,
                    n_jobs=self.n_jobs,
                    progress_reporter=pg,
                    return_exceptions=True)
                self._estimators = estimators
            else:
                evaluate = ['timescales']
                evaluate_args = [[self.nits]]
                if self._estimator_produces_samples():
                    evaluate.append('sample_f')
                    evaluate_args.append('timescales')
                results = estimate_param_scan(
                    self.estimator,
                    dtrajs,
                    param_sets,
                    failfast=False,
                    return_estimators=False,
                    n_jobs=self.n_jobs,
                    evaluate=evaluate,
                    evaluate_args=evaluate_args,
                    progress_reporter=pg,
                    return_exceptions=True,
                )

                if self._estimator_produces_samples():
                    models = [
                        _DummyModel(lag, ts, ts_sample)
                        for lag, (ts, ts_sample) in zip(lags, results)
                    ]
                else:
                    models = [
                        _DummyModel(
                            lag,
                            ts,
                            None,
                        ) for lag, ts in zip(lags, results)
                    ]
            self._postprocess_results(models)

        return self
예제 #6
0
    def _estimate(self, data):
        # lag times
        self._lags = np.array(self.mlags) * self.test_estimator.lag
        pargrid = list(param_grid({'lag': self._lags}))
        # do we have zero lag? this must be treated separately
        include0 = self.mlags[0] == 0
        if include0:
            pargrid = pargrid[1:]

        self._pred = []
        self._pred_L = []
        self._pred_R = []

        self._est = []
        self._est_L = []
        self._est_R = []

        # clone estimators and run estimates

        if self.show_progress:
            if isinstance(self.test_estimator, SampledModel):
                self.test_estimator.show_progress = False
            progress_reporter = self
        else:
            progress_reporter = None

        estimated_models, estimators = \
            estimate_param_scan(self.test_estimator, data, pargrid, return_estimators=True, failfast=False,
                                progress_reporter=progress_reporter, n_jobs=self.n_jobs)
        if include0:
            estimated_models = [None] + estimated_models
            estimators = [None] + estimators

        for i, mlag in enumerate(self.mlags):
            # make a prediction using the current model
            self._pred.append(
                self._compute_observables(self.test_model, self.test_estimator,
                                          mlag))
            # compute prediction errors if we can
            if self.has_errors:
                l, r = self._compute_observables_conf(self.test_model,
                                                      self.test_estimator,
                                                      mlag)
                self._pred_L.append(l)
                self._pred_R.append(r)

            # do an estimate at this lagtime
            model = estimated_models[i]
            estimator = estimators[i]
            self._est.append(self._compute_observables(model, estimator))
            if self.has_errors and self.err_est:
                l, r = self._compute_observables_conf(model, estimator)
                self._est_L.append(l)
                self._est_R.append(r)

        # build arrays
        self._est = np.array(self._est)
        self._pred = np.array(self._pred)
        if self.has_errors:
            self._pred_L = np.array(self._pred_L)
            self._pred_R = np.array(self._pred_R)
        else:
            self._pred_L = None
            self._pred_R = None
        if self.has_errors and self.err_est:
            self._est_L = np.array(self._est_L)
            self._est_R = np.array(self._est_R)
        else:
            self._est_L = None
            self._est_R = None

        return self