def test_keyboard_interrupt(self): """ ensure we can break the execution no matter of failfast=False""" class sleeping_estimator(Estimator): def __init__(self, raise_=False): self._raise = raise_ def _estimate(self, X): if not self.raise_: import time time.sleep(5) else: raise KeyboardInterrupt() with self.assertRaises(KeyboardInterrupt): estimate_param_scan(sleeping_estimator, X=None, param_sets=[{ 'raise_': (False, True) }], failfast=False)
def test_evaluate_msm(self): from pyerna.msm.estimators import MaximumLikelihoodMSM dtraj = [ 0, 0, 1, 2, 1, 0, 1, 0, 1, 2, 2, 0, 0, 0, 1, 1, 2, 1, 0, 0, 1, 2, 1, 0, 0, 0, 1, 1, 0, 1, 2 ] # mini-trajectory param_sets = param_grid({'lag': [1, 2, 3]}) res = estimate_param_scan(MaximumLikelihoodMSM, dtraj, param_sets, evaluate='timescales') self.assertIsInstance(res, list)
def test_evaluate_msm_multi_arg(self): from pyerna.msm.estimators import MaximumLikelihoodMSM dtraj = [ 0, 0, 1, 2, 1, 0, 1, 0, 1, 2, 2, 0, 0, 0, 1, 1, 2, 1, 0, 0, 1, 2, 1, 0, 0, 0, 1, 1, 0, 1, 2 ] # mini-trajectory traj_len = 10 param_sets = param_grid({'lag': [1, 2, 3]}) # def generate_traj(self, N, start=None, stop=None, stride=1): res = estimate_param_scan(MaximumLikelihoodMSM, dtraj, param_sets, evaluate='generate_traj', evaluate_args=((traj_len, 2, None, 2), )) self.assertIsInstance(res, list) self.assertEqual(len(res), 3) # three lag times self.assertTrue(all(len(x) == traj_len for x in res))
def test_evaluate_bmsm_single_arg(self): from pyerna.msm.estimators import BayesianMSM dtraj = [ 0, 0, 1, 2, 1, 0, 1, 0, 1, 2, 2, 0, 0, 0, 1, 1, 2, 1, 0, 0, 1, 2, 1, 0, 0, 0, 1, 1, 0, 1, 2 ] # mini-trajectory n_samples = 52 param_sets = param_grid({ 'lag': [1, 2, 3], 'show_progress': (False, ), 'nsamples': (n_samples, ) }) res = estimate_param_scan(BayesianMSM, dtraj, param_sets, evaluate='sample_f', evaluate_args='timescales') self.assertIsInstance(res, list) self.assertEqual(len(res), 3) # three lag times self.assertEqual(len(res[0]), n_samples)
def _estimate(self, dtrajs): ### PREPARE AND CHECK DATA # TODO: Currently only discrete trajectories are implemented. For a general class this needs to be changed. dtrajs = _types.ensure_dtraj_list(dtrajs) # check trajectory lengths if self._estimated: # if dtrajs has now changed, unset the _estimated flag to re-set every derived quantity. assert hasattr(self, '_last_dtrajs_input_hash') current_hash = _hash_dtrajs(dtrajs) if self._last_dtrajs_input_hash != current_hash: self.logger.warning( "estimating from new data, discard all previously computed models." ) self._estimated = False self._last_dtrajs_input_hash = current_hash else: self._last_dtrajs_input_hash = _hash_dtrajs(dtrajs) self._trajlengths = np.fromiter((len(traj) for traj in dtrajs), dtype=int, count=len(dtrajs)) maxlength = np.max(self._trajlengths) # set lag times by data if not yet set if self._lags is None: maxlag = 0.5 * np.sum(self._trajlengths) / float( len(self._trajlengths)) self._lags = _generate_lags(maxlag, 1.5) # check if some lag times are forbidden. if np.max(self._lags) >= maxlength: Ifit = np.where(self._lags < maxlength)[0] Inofit = np.where(self._lags >= maxlength)[0] self.logger.warning( 'Ignoring lag times that exceed the longest trajectory: %s', self._lags[Inofit]) self._lags = self._lags[Ifit] ### RUN ESTIMATION if self._estimated: # we already had run an estimation, determine which lag times we need to compute # TODO: this will re-evaluate problematic lag times, wont it? lags = sorted(list(set(self._lags).difference(self._last_lags))) if len(lags) == 0: self.logger.info("All lag times already estimated.") return self assert lags self.logger.info( "Running estimating for not yet estimated lags times: %s", lags) else: lags = self._lags # construct all parameter sets for the estimator param_sets = tuple(param_grid({'lag': lags})) # run estimation on all lag times if hasattr(self.estimator, 'show_progress'): self.estimator.show_progress = False if self.show_progress: pg = ProgressReporter() ctx = pg.context() else: pg = None # TODO: replace with nullcontext from util once merged. from contextlib import contextmanager @contextmanager def dummy(): yield ctx = dummy() with ctx: if not self.only_timescales: models, estimators = estimate_param_scan( self.estimator, dtrajs, param_sets, failfast=False, return_estimators=True, n_jobs=self.n_jobs, progress_reporter=pg, return_exceptions=True) self._estimators = estimators else: evaluate = ['timescales'] evaluate_args = [[self.nits]] if self._estimator_produces_samples(): evaluate.append('sample_f') evaluate_args.append('timescales') results = estimate_param_scan( self.estimator, dtrajs, param_sets, failfast=False, return_estimators=False, n_jobs=self.n_jobs, evaluate=evaluate, evaluate_args=evaluate_args, progress_reporter=pg, return_exceptions=True, ) if self._estimator_produces_samples(): models = [ _DummyModel(lag, ts, ts_sample) for lag, (ts, ts_sample) in zip(lags, results) ] else: models = [ _DummyModel( lag, ts, None, ) for lag, ts in zip(lags, results) ] self._postprocess_results(models) return self
def _estimate(self, data): # lag times self._lags = np.array(self.mlags) * self.test_estimator.lag pargrid = list(param_grid({'lag': self._lags})) # do we have zero lag? this must be treated separately include0 = self.mlags[0] == 0 if include0: pargrid = pargrid[1:] self._pred = [] self._pred_L = [] self._pred_R = [] self._est = [] self._est_L = [] self._est_R = [] # clone estimators and run estimates if self.show_progress: if isinstance(self.test_estimator, SampledModel): self.test_estimator.show_progress = False progress_reporter = self else: progress_reporter = None estimated_models, estimators = \ estimate_param_scan(self.test_estimator, data, pargrid, return_estimators=True, failfast=False, progress_reporter=progress_reporter, n_jobs=self.n_jobs) if include0: estimated_models = [None] + estimated_models estimators = [None] + estimators for i, mlag in enumerate(self.mlags): # make a prediction using the current model self._pred.append( self._compute_observables(self.test_model, self.test_estimator, mlag)) # compute prediction errors if we can if self.has_errors: l, r = self._compute_observables_conf(self.test_model, self.test_estimator, mlag) self._pred_L.append(l) self._pred_R.append(r) # do an estimate at this lagtime model = estimated_models[i] estimator = estimators[i] self._est.append(self._compute_observables(model, estimator)) if self.has_errors and self.err_est: l, r = self._compute_observables_conf(model, estimator) self._est_L.append(l) self._est_R.append(r) # build arrays self._est = np.array(self._est) self._pred = np.array(self._pred) if self.has_errors: self._pred_L = np.array(self._pred_L) self._pred_R = np.array(self._pred_R) else: self._pred_L = None self._pred_R = None if self.has_errors and self.err_est: self._est_L = np.array(self._est_L) self._est_R = np.array(self._est_R) else: self._est_L = None self._est_R = None return self