def setUpClass(cls): # load observations import pyemma.datasets obs = pyemma.datasets.load_2well_discrete().dtraj_T100K_dt10 obs -= np.min(obs) # remove empty states # hidden states nstates = 2 # run with lag 1 and 10 cls.msm_lag1 = msm.estimate_markov_model([obs], 1, reversible=True, connectivity='largest') cls.hmsm_lag1 = msm.estimate_hidden_markov_model([obs], nstates, 1, reversible=True, observe_nonempty=True) cls.msm_lag10 = msm.estimate_markov_model([obs], 10, reversible=True, connectivity='largest') cls.hmsm_lag10 = msm.estimate_hidden_markov_model( [obs], nstates, 10, reversible=True, observe_nonempty=True)
def test_separate_states(self): dtrajs = [np.array([0, 1, 1, 1, 1, 1, 0, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1]), np.array([2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2]),] hmm = msm.estimate_hidden_markov_model(dtrajs, 3, lag=1, separate=[0]) # we expect zeros in all samples at the following indexes: pobs_zeros = [[0, 1, 2, 2, 2], [0, 0, 1, 2, 3]] assert np.allclose(hmm.observation_probabilities[pobs_zeros], 0)
def test_submodel_simple(self): # sanity check for submodel; # call should not alter self from copy import deepcopy # dtrj = np.random.randint(0, 2, size=100) # dtrj[np.random.randint(0, dtrj.shape[0], 3)] = 2 # hard-coded due to stochastic failures dtrj = [ 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 2, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 2, 0, 0, 1, 1, 2, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0 ] h = msm.estimate_hidden_markov_model(dtrj, 3, 2) h_original = deepcopy(h) hs = h.submodel_largest(mincount_connectivity=5) self.assertTrue(h == h_original) self.assertEqual(hs.timescales().shape[0], 1) self.assertEqual(hs.pi.shape[0], 2) self.assertEqual(hs.transition_matrix.shape, (2, 2))
def markovModel(self, lag, macronum): from pyemma.msm import estimate_hidden_markov_model self.hmm = estimate_hidden_markov_model(self.data.St.tolist(), macronum, lag, connectivity='largest') print('Active set includes macrostates: {}'.format( self.hmm.active_set))
def test_failfast_false(self): """ test, that no exception is raised during estimation""" from pyemma._base.estimator import _estimate_param_scan_worker failfast = False @wraps(_estimate_param_scan_worker) def worker_wrapper(*args): args = list(args) args[5] = failfast return _estimate_param_scan_worker(*args) with mock.patch('pyemma._base.estimator._estimate_param_scan_worker', worker_wrapper): hmm = msm.estimate_hidden_markov_model([0, 0, 0, 1, 1, 1, 0, 0], 2, 1, ) hmm.cktest(n_jobs=1)
def test_failfast_true(self): """ test that exception is thrown for failfast=True""" from pyemma._base.estimator import _estimate_param_scan_worker failfast = True @wraps(_estimate_param_scan_worker) def worker_wrapper(*args): args = list(args) args[5] = failfast return _estimate_param_scan_worker(*args) with self.assertRaises(BaseException): with mock.patch('pyemma._base.estimator._estimate_param_scan_worker', worker_wrapper): hmm = msm.estimate_hidden_markov_model([0, 0, 0, 1, 1, 1, 0, 0], 2, 1, ) hmm.cktest(n_jobs=1)
def test_its_hmsm(self): MLHMM = msm.estimate_hidden_markov_model( [self.double_well_data.dtraj_T100K_dt10_n6good], 2, 10) ck = MLHMM.cktest(mlags=[0, 1, 10]) estref = np.array([[[1., 0.], [0., 1.]], [[0.98515058, 0.01484942], [0.01442843, 0.98557157]], [[0.88172685, 0.11827315], [0.11878823, 0.88121177]]]) predref = np.array([[[1., 0.], [0., 1.]], [[0.98515058, 0.01484942], [0.01442843, 0.98557157]], [[0.86961812, 0.13038188], [0.12668553, 0.87331447]]]) # rough agreement with MLE assert np.allclose(ck.estimates, estref, rtol=0.1, atol=10.0) assert ck.estimates_conf[0] is None assert ck.estimates_conf[1] is None assert np.allclose(ck.predictions, predref, rtol=0.1, atol=10.0) assert ck.predictions_conf[0] is None assert ck.predictions_conf[1] is None
#with open('clustering'+sys_nm+'_trajfrac-'+str(traj_frac)+'.pkl', 'rb') as f: # clustering = pickle.load(f) dtrajs = clustering.dtrajs cc = clustering.clustercenters[:, 0] print 'n_clusters = ' + str(len(cc)) else: dtrajs = None # send the dtraj info dtrajs = comm.bcast(dtrajs, root=0) # HMSM if (rank < size - Nign): # leave out some processors nstates = n_Hstates for lag in range(lag0[traj_frac], len(lags)): print 'Starting lag ' + str(lag) + ' of ' + str(len(lags)) hmsm = msm.estimate_hidden_markov_model(dtrajs, nstates, lags[lag], reversible=True, stationary=False, stride=1) #hmsm = msm.bayesian_hidden_markov_model(dtrajs, nstates, lags[lag], nsamples=Nsamples, reversible=True, stationary=False, stride=1, conf=Iconf) save_object( 'HMSM_' + sys_nm + '_trajfrac-' + str(traj_frac) + '_lag-' + str(lags[lag]) + '.pkl', hmsm) # already did this, read it in #with open('HMSM_'+sys_nm+'_trajfrac-'+str(traj_frac)+'_lag-'+str(lags[lag])+'.pkl', 'rb') as f: # hmsm = pickle.load(f)
def markovModel(self, lag, macronum): from pyemma.msm import estimate_hidden_markov_model self.hmm = estimate_hidden_markov_model(self.data.St.tolist(), macronum, lag, connectivity='largest') print('Active set includes macrostates: {}'.format(self.hmm.active_set))
connectivity_type = 'largest' initial = 10 ending = 400 interval = ending * 1.0 / 20 lag_times = [] for j in range(20): lag_times.append(int(initial + j * interval)) print lag_times print "unit is 20 ps" for j in range(20): print "now we are dealing with lagtime ", lag_times[j] mm = msm.estimate_hidden_markov_model(kcenters_sequences, nMacro, lag=lag_times[j], reversible=reversible_type, connectivity=connectivity_type) np.savetxt( "hmm_%d_state_%d_lagtime_transition_matrix.txt" % (nMacro, lag_times[j]), mm.transition_matrix) np.savetxt( "hmm_%d_state_%d_lagtime_stationary_pop.txt" % (nMacro, lag_times[j]), mm.stationary_distribution) np.savetxt("hmm_%d_state_%d_lagtime_emisson.txt" % (nMacro, lag_times[j]), mm.metastable_distributions) np.savetxt( "hmm_%d_state_%d_lagtime_membership.txt" % (nMacro, lag_times[j]), mm.metastable_memberships) print mm.timescales()
print('tau = ', tau) for k in [ks[-1]]: print('\tk = ', k) m = MaximumLikelihoodMSM(lag=tau, connectivity='largest', reversible=True) m.fit(erg_dtrajs) assert m.active_count_fraction == 1.0, 'Active count fraction not 1.0' print('\tFitting HMM') hmm = estimate_hidden_markov_model(dtrajs=erg_dtrajs, nstates=int(k), lag=tau, stationary=False, reversible=True, connectivity='largest') #hmm = MaximumLikelihoodHMSM(nstates=int(k), lag=tau, stationary=False, reversible=True, connectivity='largest', msm_init=m) #hmm.fit(erg_dtrajs[:2]) results['k'].append(k) results['tau'].append(tau) results['bic'].append(bic(hmm)) results['aic'].append(aic(hmm)) results['icl'].append(icl(hmm)) results['entropy'].append(class_entropy(hmm)) results['dofs'].append(dof(hmm)) results['n_obs'].append(n_obs(hmm)) pd.DataFrame(results).to_pickle('h_state_selection_tau-{}.p'.format(tau))