def test_keyboard_interrupt(self): """ ensure we can break the execution no matter of failfast=False""" class sleeping_estimator(Estimator): def __init__(self, raise_=False): self._raise = raise_ def _estimate(self, X): if not self.raise_: import time time.sleep(5) else: raise KeyboardInterrupt() with self.assertRaises(KeyboardInterrupt): estimate_param_scan(sleeping_estimator, X=None, param_sets=[{'raise_': (False, True)}], failfast=False, n_jobs=1)
def test_evaluate_msm(self): from pyemma.msm.estimators import MaximumLikelihoodMSM dtraj = [0, 0, 1, 2, 1, 0, 1, 0, 1, 2, 2, 0, 0, 0, 1, 1, 2, 1, 0, 0, 1, 2, 1, 0, 0, 0, 1, 1, 0, 1, 2] # mini-trajectory param_sets = param_grid({'lag': [1, 2, 3]}) res = estimate_param_scan(MaximumLikelihoodMSM, dtraj, param_sets, evaluate='timescales', n_jobs=1) self.assertIsInstance(res, list)
def _estimate(self, dtrajs): ### PREPARE AND CHECK DATA # TODO: Currently only discrete trajectories are implemented. For a general class this needs to be changed. dtrajs = _types.ensure_dtraj_list(dtrajs) # check trajectory lengths if self._estimated: # if dtrajs has now changed, unset the _estimated flag to re-set every derived quantity. assert hasattr(self, '_last_dtrajs_input_hash') current_hash = _hash_dtrajs(dtrajs) if self._last_dtrajs_input_hash != current_hash: self.logger.warning("estimating from new data, discard all previously computed models.") self._estimated = False self._last_dtrajs_input_hash = current_hash else: self._last_dtrajs_input_hash = _hash_dtrajs(dtrajs) self._trajlengths = np.fromiter((len(traj) for traj in dtrajs), dtype=int, count=len(dtrajs)) maxlength = np.max(self._trajlengths) # set lag times by data if not yet set if self._lags is None: maxlag = 0.5 * np.sum(self._trajlengths) / float(len(self._trajlengths)) self._lags = _generate_lags(maxlag, 1.5) # check if some lag times are forbidden. if np.max(self._lags) >= maxlength: Ifit = np.where(self._lags < maxlength)[0] Inofit = np.where(self._lags >= maxlength)[0] self.logger.warning('Ignoring lag times that exceed the longest trajectory: %s', self._lags[Inofit]) self._lags = self._lags[Ifit] ### RUN ESTIMATION if self._estimated: # we already had run an estimation, determine which lag times we need to compute # TODO: this will re-evaluate problematic lag times, wont it? lags = sorted(list(set(self._lags).difference(self._last_lags))) if len(lags) == 0: self.logger.info("All lag times already estimated.") return self assert lags self.logger.info("Running estimating for not yet estimated lags times: %s", lags) else: lags = self._lags # construct all parameter sets for the estimator param_sets = tuple(param_grid({'lag': lags})) # run estimation on all lag times pg = ProgressReporter() with pg.context(): models, estimators = estimate_param_scan(self.estimator, dtrajs, param_sets, failfast=False, return_estimators=True, n_jobs=self.n_jobs, progress_reporter=pg, return_exceptions=True) self._estimators = estimators self._postprocess_results(models) return self
def test_evaluate_bmsm_single_arg(self): from pyemma.msm.estimators import BayesianMSM dtraj = [0, 0, 1, 2, 1, 0, 1, 0, 1, 2, 2, 0, 0, 0, 1, 1, 2, 1, 0, 0, 1, 2, 1, 0, 0, 0, 1, 1, 0, 1, 2] # mini-trajectory n_samples = 52 param_sets = param_grid({'lag': [1, 2, 3], 'show_progress': (False, ), 'nsamples': (n_samples, )}) res = estimate_param_scan(BayesianMSM, dtraj, param_sets, evaluate='sample_f', evaluate_args='timescales', n_jobs=1) self.assertIsInstance(res, list) self.assertEqual(len(res), 3) # three lag times self.assertEqual(len(res[0]), n_samples)
def test_evaluate_msm_multi_arg(self): from pyemma.msm.estimators import MaximumLikelihoodMSM dtraj = [0, 0, 1, 2, 1, 0, 1, 0, 1, 2, 2, 0, 0, 0, 1, 1, 2, 1, 0, 0, 1, 2, 1, 0, 0, 0, 1, 1, 0, 1, 2] # mini-trajectory traj_len = 10 param_sets = param_grid({'lag': [1, 2, 3]}) # def generate_traj(self, N, start=None, stop=None, stride=1): res = estimate_param_scan(MaximumLikelihoodMSM, dtraj, param_sets, evaluate='generate_traj', evaluate_args=((traj_len, 2, None, 2), ), n_jobs=1) self.assertIsInstance(res, list) self.assertEqual(len(res), 3) # three lag times self.assertTrue(all(len(x) == traj_len for x in res))
def _estimate(self, data): # lag times self._lags = np.array(self.mlags) * self.test_estimator.lag pargrid = list(param_grid({'lag': self._lags})) # do we have zero lag? this must be treated separately include0 = self.mlags[0] == 0 if include0: pargrid = pargrid[1:] self._pred = [] self._pred_L = [] self._pred_R = [] self._est = [] self._est_L = [] self._est_R = [] # clone estimators and run estimates if self.show_progress: if isinstance(self.test_estimator, SampledModel): self.test_estimator.show_progress = False progress_reporter = self else: progress_reporter = None estimated_models, estimators = \ estimate_param_scan(self.test_estimator, data, pargrid, return_estimators=True, failfast=False, progress_reporter=progress_reporter, n_jobs=self.n_jobs) if include0: estimated_models = [None] + estimated_models estimators = [None] + estimators for i, mlag in enumerate(self.mlags): # make a prediction using the current model self._pred.append(self._compute_observables(self.test_model, self.test_estimator, mlag)) # compute prediction errors if we can if self.has_errors: l, r = self._compute_observables_conf(self.test_model, self.test_estimator, mlag) self._pred_L.append(l) self._pred_R.append(r) # do an estimate at this lagtime model = estimated_models[i] estimator = estimators[i] self._est.append(self._compute_observables(model, estimator)) if self.has_errors and self.err_est: l, r = self._compute_observables_conf(model, estimator) self._est_L.append(l) self._est_R.append(r) # build arrays self._est = np.array(self._est) self._pred = np.array(self._pred) if self.has_errors: self._pred_L = np.array(self._pred_L) self._pred_R = np.array(self._pred_R) else: self._pred_L = None self._pred_R = None if self.has_errors and self.err_est: self._est_L = np.array(self._est_L) self._est_R = np.array(self._est_R) else: self._est_L = None self._est_R = None return self
def _estimate(self, data): r"""Estimates ITS at set of lagtimes """ ### PREPARE AND CHECK DATA # TODO: Currenlty only discrete trajectories are implemented. For a general class this needs to be changed. data = _types.ensure_dtraj_list(data) # check trajectory lengths self._trajlengths = np.array([len(traj) for traj in data]) maxlength = np.max(self._trajlengths) # set lag times by data if not yet set if self._lags is None: maxlag = 0.5 * np.sum(self._trajlengths) / float( len(self._trajlengths)) self._lags = _generate_lags(maxlag, 1.5) # check if some lag times are forbidden. if np.max(self._lags) >= maxlength: Ifit = np.where(self._lags < maxlength)[0] Inofit = np.where(self._lags >= maxlength)[0] self.logger.warning( 'Ignoring lag times that exceed the longest trajectory: ' + str(self._lags[Inofit])) self._lags = self._lags[Ifit] ### RUN ESTIMATION # construct all parameter sets for the estimator param_sets = tuple(param_grid({'lag': self._lags})) if isinstance(self.estimator, SampledModel): self.estimator.show_progress = False # run estimation on all lag times self._models, self._estimators = estimate_param_scan( self.estimator, data, param_sets, failfast=False, return_estimators=True, n_jobs=self.n_jobs, progress_reporter=self) ### PROCESS RESULTS # if some results are None, estimation has failed. Warn and truncate models and lag times good = np.array( [i for i, m in enumerate(self._models) if m is not None], dtype=int) bad = np.array([i for i, m in enumerate(self._models) if m is None], dtype=int) if good.size == 0: raise RuntimeError( 'Estimation has failed at ALL lagtimes. Check for errors.') if bad.size > 0: self.logger.warning( 'Estimation has failed at lagtimes: ' + str(self._lags[bad]) + '. Run single-lag estimation at these lags to track down the error.' ) self._lags = self._lags[good] self._models = list(np.array(self._models)[good]) # timescales timescales = [m.timescales() for m in self._models] # how many finite timescales do we really have? maxnts = max([len(ts[np.isfinite(ts)]) for ts in timescales]) if self.nits is None: self.nits = maxnts if maxnts < self.nits: self.nits = maxnts self.logger.warning( 'Changed user setting nits to the number of available timescales nits=' + str(self.nits)) # sort timescales into matrix computed_all = True # flag if we have found any problems self._its = np.empty((len(self._lags), self.nits)) self._its[:] = np.NAN # initialize with NaN in order to point out timescales that were not computed self._successful_lag_indexes = [] for i, ts in enumerate(timescales): if ts is not None: if np.any( np.isfinite(ts) ): # if there are any finite timescales available, add them self._its[i, :len( ts )] = ts[:self. nits] # copy into array. Leave NaN if there is no timescale self._successful_lag_indexes.append(i) if len(self._successful_lag_indexes) < len(self._lags): computed_all = False if np.any(np.isnan(self._its)): computed_all = False # timescales samples if available if issubclass(self._models[0].__class__, SampledModel): # samples timescales_samples = [ m.sample_f('timescales') for m in self._models ] nsamples = np.shape(timescales_samples[0])[0] self._its_samples = np.empty( (nsamples, len(self._lags), self.nits)) self._its_samples[:] = np.NAN # initialize with NaN in order to point out timescales that were not computed for i, ts in enumerate(timescales_samples): if ts is not None: ts = np.vstack(ts) ts = ts[:, :self.nits] self._its_samples[:, i, :ts.shape[ 1]] = ts # copy into array. Leave NaN if there is no timescales if np.any(np.isnan(self._its_samples)): computed_all = False if not computed_all: self.logger.warning( 'Some timescales could not be computed. Timescales array is smaller than ' 'expected or contains NaNs')