def setUpClass(cls): # load observations import pyemma.datasets obs = pyemma.datasets.load_2well_discrete().dtraj_T100K_dt10 obs -= np.min(obs) # remove empty states # hidden states nstates = 2 # run with lag 1 and 10 cls.msm_lag1 = msm.estimate_markov_model([obs], 1, reversible=True, connectivity='largest') cls.hmsm_lag1 = msm.estimate_hidden_markov_model([obs], nstates, 1, reversible=True, observe_nonempty=True) cls.msm_lag10 = msm.estimate_markov_model([obs], 10, reversible=True, connectivity='largest') cls.hmsm_lag10 = msm.estimate_hidden_markov_model( [obs], nstates, 10, reversible=True, observe_nonempty=True)
def test_oom(self): from pyemma import msm msm_one_over_n = msm.estimate_markov_model(self.dtraj, lag=1, mincount_connectivity='1/n', weights='oom') # we now restrict the connectivity to have at least 6 counts, so we will loose state 2 msm_restrict_connectivity = msm.estimate_markov_model(self.dtraj, lag=1, mincount_connectivity=6, weights='oom') self._test_connectivity(msm_one_over_n, msm_restrict_connectivity)
def setUpClass(cls): import pyemma.datasets cls.core_set = [34, 65] cls.dtraj = pyemma.datasets.load_2well_discrete().dtraj_T100K_dt10 nu = 1. * np.bincount(cls.dtraj)[cls.core_set] cls.statdist = nu / nu.sum() cls.tau = 10 maxerr = 1e-12 warnings.filterwarnings("ignore") with warnings.catch_warnings(): warnings.simplefilter('ignore') cls.msmrev = estimate_markov_model(cls.dtraj, cls.tau, maxerr=maxerr, core_set=cls.core_set) cls.msmrevpi = estimate_markov_model(cls.dtraj, cls.tau, maxerr=maxerr, statdist=cls.statdist, core_set=cls.core_set) cls.msm = estimate_markov_model(cls.dtraj, cls.tau, reversible=False, maxerr=maxerr, core_set=cls.core_set)
def test_valid_trajectory(self): pi = np.array([0.1, 0.0, 0.9]) dtraj_invalid = np.array([1, 1, 1, 1, 1, 1, 1]) dtraj_valid = np.array([0, 2, 0, 2, 2, 0, 1, 1]) msm = estimate_markov_model(dtraj_valid, 1, statdist=pi) self.assertTrue(np.all(msm.active_set == np.array([0, 2]))) with self.assertRaises(ValueError): msm = estimate_markov_model(dtraj_invalid, 1, statdist=pi)
def test_msm(self): msm_one_over_n = estimate_markov_model(self.dtraj, lag=1, mincount_connectivity='1/n') msm_restrict_connectivity = estimate_markov_model( self.dtraj, lag=1, mincount_connectivity=self.mincount_connectivity) self._test_connectivity(msm_one_over_n, msm_restrict_connectivity)
def test_valid_stationary_vector(self): dtraj = np.array([0, 0, 1, 0, 1, 2]) pi_valid = np.array([0.1, 0.9, 0.0]) pi_invalid = np.array([0.1, 0.9]) active_set = np.array([0, 1]) msm = estimate_markov_model(dtraj, 1, statdist=pi_valid) self.assertTrue(np.all(msm.active_set == active_set)) with self.assertRaises(ValueError): msm = estimate_markov_model(dtraj, 1, statdist=pi_invalid)
def test_valid_trajectory(self): pi = np.array([0.1, 0.9]) dtraj_invalid = np.array([1, 1, 1, 1, 1, 1, 1]) dtraj_valid = np.array([0, 2, 0, 2, 2, 0, 1, 1]) core_set = [0, 2] msm = estimate_markov_model(dtraj_valid, 1, statdist=pi, core_set=core_set) self.assertTrue(np.all(msm.active_set==np.array(core_set))) np.testing.assert_array_equal(msm.pi, pi) with self.assertRaises(ValueError): estimate_markov_model(dtraj_invalid, 1, statdist=pi, core_set=core_set)
def test_CK_covariances_against_MSM(self): obs = np.eye(3) # observe every state sta = np.eye(3) # restrict p0 to every state cktest = self.vamp.cktest(observables=obs, statistics=sta, mlags=4, show_progress=True, n_jobs=1) pred = cktest.predictions[1:] est = cktest.estimates[1:] for i, (est_, pred_) in enumerate(zip(est, pred)): msm = estimate_markov_model(dtrajs=self.dtrajs, lag=self.lag * (i + 1), reversible=False) msm_esti = (self.p0 * sta).T.dot(msm.P).dot(obs).T msm_pred = (self.p0 * sta).T.dot( np.linalg.matrix_power(self.msm.P, (i + 1))).dot(obs).T np.testing.assert_allclose(np.diag(pred_), np.diag(msm_pred), atol=self.atol) np.testing.assert_allclose(np.diag(est_), np.diag(msm_esti), atol=self.atol) np.testing.assert_allclose(np.diag(est_), np.diag(pred_), atol=0.006)
def test_score_vs_MSM(self): from pyemma.util.contexts import numpy_random_seed with numpy_random_seed(32): trajs_test, trajs_train = cvsplit_trajs(self.trajs) with numpy_random_seed(32): dtrajs_test, dtrajs_train = cvsplit_trajs(self.dtrajs) methods = ('VAMP1', 'VAMP2', 'VAMPE') for m in methods: msm_train = estimate_markov_model(dtrajs=dtrajs_train, lag=self.lag, reversible=False) score_msm = msm_train.score(dtrajs_test, score_method=m, score_k=None) vamp_train = pyemma_api_vamp(data=trajs_train, lag=self.lag, dim=1.0) score_vamp = vamp_train.score(test_data=trajs_test, score_method=m) self.assertAlmostEqual(score_msm, score_vamp, places=2 if m == 'VAMPE' else 3, msg=m)
def test_rdl_recompute(self): """ test for issue 1301. Should recompute RDL decomposition in case of new transition matrix. """ msm = estimate_markov_model(self.dtraj, self.tau) ev1 = msm.eigenvectors_left(2) msm.estimate(self.dtraj, lag=self.tau + 1) ev2 = msm.eigenvectors_left(2) assert ev2 is not ev1
def setUpClass(cls): N_steps = 10000 N_traj = 20 lag = 1 T = np.linalg.matrix_power( np.array([[0.7, 0.2, 0.1], [0.1, 0.8, 0.1], [0.1, 0.1, 0.8]]), lag) dtrajs = [generate(T, N_steps) for _ in range(N_traj)] p0 = np.zeros(3) p1 = np.zeros(3) trajs = [] for dtraj in dtrajs: traj = np.zeros((N_steps, T.shape[0])) traj[np.arange(len(dtraj)), dtraj] = 1.0 trajs.append(traj) p0 += traj[:-lag, :].sum(axis=0) p1 += traj[lag:, :].sum(axis=0) vamp = pyemma_api_vamp(trajs, lag=lag, scaling=None, dim=1.0) msm = estimate_markov_model(dtrajs, lag=lag, reversible=False) cls.trajs = trajs cls.dtrajs = dtrajs cls.lag = lag cls.msm = msm cls.vamp = vamp cls.p0 = p0 / p0.sum() cls.p1 = p1 / p1.sum() cls.atol = np.finfo(vamp.output_type()).eps * 1000.0
def markovModel(self, lag, macronum, units='frames', sparse=False, hmm=False): """ Build a Markov model at a given lag time and calculate metastable states Parameters ---------- lag : int The lag time at which to calculate the Markov state model. The units are specified with the `units` argument. macronum : int The number of macrostates (metastable states) to produce units : str The units of lag. Can be 'frames' or any time unit given as a string. sparse : bool Make the transition matrix sparse. Useful if lots (> 4000) states are used for the MSM. Warning: untested. Examples -------- >>> model = Model(data) >>> model.markovModel(150, 4) # 150 frames lag, 4 macrostates """ import pyemma.msm as msm self._integrityCheck(markov=True) lag = unitconvert(units, 'frames', lag, fstep=self.data.fstep) self.lag = lag self.msm = msm.estimate_markov_model(self.data.St.tolist(), self.lag, sparse=sparse) modelflag = False while not modelflag: self.coarsemsm = self.msm.pcca(macronum) if len(np.unique(self.msm.metastable_assignments)) != macronum: macronum -= 1 logger.warning( 'PCCA returned empty macrostates. Reducing the number of macrostates to {}.' .format(macronum)) else: modelflag = True if macronum < 2: raise RuntimeError( 'Could not create even two macrostates. Please revise your clustering.' ) self._modelid = random.random() if hmm: # Still in development self.hmm = self.msm.coarse_grain(self.macronum) logger.info('{:.1f}% of the data was used'.format( self.msm.active_count_fraction * 100)) _macroTrajectoriesReport( self.macronum, _macroTrajSt(self.data.St, self.macro_ofcluster), self.data.simlist)
def test_MSM_sparse(self): msm = estimate_markov_model(self.dtraj, self.tau, sparse=True) assert_allclose(self.dtraj, msm.discrete_trajectories_full[0]) self.assertEqual(self.tau, msm.lagtime) assert_allclose(self.lcc_MSM, msm.largest_connected_set) self.assertTrue(np.allclose(self.Ccc_MSM.toarray(), msm.count_matrix_active.toarray())) self.assertTrue(np.allclose(self.C_MSM.toarray(), msm.count_matrix_full.toarray())) self.assertTrue(np.allclose(self.P_MSM.toarray(), msm.transition_matrix.toarray())) assert_allclose(self.mu_MSM, msm.stationary_distribution) assert_allclose(self.ts[1:], msm.timescales(self.k - 1))
def buildMSM(self): """ Estimate a MSM from the trajectories using a provided lagtime that should be big enough so that the relevant processes have converged. self.error: whether to estimate errors or not """ if self.error: self.MSM_object = msm.bayesian_markov_model(self.dtrajs, self.lagtime) else: self.MSM_object = msm.estimate_markov_model(self.dtrajs, self.lagtime)
def main(lagtimes, clusters_file, disctraj, trajs, n_clusters, plots_path, save_plot, show_plot, lagtime_resolution=20): if disctraj is not None: dtraj_files = glob.glob(os.path.join(disctraj, "*traj*.disctraj")) dtrajs = [np.loadtxt(f, dtype=int) for f in dtraj_files] clusterCenters = np.loadtxt(clusters_file) else: clusteringObject = cluster.Cluster(n_clusters, trajs, "traj*", alwaysCluster=False, discretizedPath=disctraj) if clusters_file is not None: # only assign clusteringObject.clusterCentersFile = clusters_file clusteringObject.clusterTrajectories() clusterCenters = clusteringObject.clusterCenters dtrajs = clusteringObject.dtrajs Q = [] for lag in lagtimes: msm_obj = msm.estimate_markov_model(dtrajs, lag) counts = msm_obj.count_matrix_full Q.append(counts.diagonal() / counts.sum()) Q = np.array(Q) print("Clusters over 0.01 metastability") correlation_limit = 0.01 states2 = np.where(Q[-1] > correlation_limit)[0] size2 = states2.size if len(states2): print(" ".join(map(str, states2))) print("Number of clusters:", size2, ", %.2f%% of the total" % (100 * size2 / float(n_clusters))) utilities.write_PDB_clusters(np.hstack((clusterCenters, Q[:-1].T)), use_beta=True, title="cluster_Q.pdb") if plots_path is None: plots_path = "" else: utilities.makeFolder(plots_path) create_plots(Q, plots_path, save_plot, show_plot, n_clusters, lagtimes, threshold=2.0)
def setUpClass(cls): import pyemma.datasets cls.dtraj = [pyemma.datasets.load_2well_discrete().dtraj_T100K_dt10] #assert isinstance(cls.dtraj, list) nu = 1. * np.bincount(cls.dtraj[0]) cls.statdist = nu / nu.sum() cls.tau = 10 cls.msmrev = estimate_markov_model(cls.dtraj, cls.tau) cls.msmrevpi = estimate_markov_model(cls.dtraj, cls.tau, statdist=cls.statdist) cls.msm = estimate_markov_model(cls.dtraj, cls.tau, reversible=False) """Sparse""" cls.msmrev_sparse = estimate_markov_model(cls.dtraj, cls.tau, sparse=True) cls.msmrevpi_sparse = estimate_markov_model(cls.dtraj, cls.tau, statdist=cls.statdist, sparse=True) cls.msm_sparse = estimate_markov_model(cls.dtraj, cls.tau, reversible=False, sparse=True)
def setUpClass(cls): import pyemma.datasets cls.dtraj = pyemma.datasets.load_2well_discrete().dtraj_T100K_dt10 nu = 1. * np.bincount(cls.dtraj) cls.statdist = nu / nu.sum() cls.tau = 10 maxerr = 1e-12 cls.msmrev = estimate_markov_model(cls.dtraj, cls.tau, maxerr=maxerr) cls.msmrevpi = estimate_markov_model(cls.dtraj, cls.tau, maxerr=maxerr, statdist=cls.statdist) cls.msm = estimate_markov_model(cls.dtraj, cls.tau, reversible=False, maxerr=maxerr) """Sparse""" cls.msmrev_sparse = estimate_markov_model(cls.dtraj, cls.tau, sparse=True, maxerr=maxerr) cls.msmrevpi_sparse = estimate_markov_model(cls.dtraj, cls.tau, maxerr=maxerr, statdist=cls.statdist, sparse=True) cls.msm_sparse = estimate_markov_model(cls.dtraj, cls.tau, reversible=False, sparse=True, maxerr=maxerr)
def markovModel(self, lag, macronum, units='frames', sparse=False): """ Build a Markov model at a given lag time and calculate metastable states Parameters ---------- lag : int The lag time at which to calculate the Markov state model. The units are specified with the `units` argument. macronum : int The number of macrostates (metastable states) to produce units : str The units of lag. Can be 'frames' or any time unit given as a string. sparse : bool Make the transition matrix sparse. Useful if lots (> 4000) states are used for the MSM. Warning: untested. Examples -------- >>> model = Model(data) >>> model.markovModel(150, 4) # 150 frames lag, 4 macrostates """ import pyemma.msm as msm self._integrityCheck(markov=True) lag = unitconvert(units, 'frames', lag, fstep=self.data.fstep) self.lag = lag self.msm = msm.estimate_markov_model(self.data.St.tolist(), self.lag, sparse=sparse) self.P = self.msm.transition_matrix self.micro_ofcluster = -np.ones(self.data.K + 1, dtype=int) self.micro_ofcluster[self.msm.active_set] = np.arange( len(self.msm.active_set)) self.cluster_ofmicro = self.msm.active_set self.micronum = len(self.msm.active_set) self.coarsemsm = self.msm.pcca(macronum) # Fixing pyemma macrostates self.macronum = len(set(self.msm.metastable_assignments)) mask = np.ones(macronum, dtype=int) * -1 mask[list(set(self.msm.metastable_assignments))] = range(self.macronum) self.macro_ofmicro = mask[self.msm.metastable_assignments] self.macro_ofcluster = -np.ones(self.data.K + 1, dtype=int) self.macro_ofcluster[self.msm.active_set] = self.macro_ofmicro logger.info('{:.1f}% of the data was used'.format( self.msm.active_count_fraction * 100)) self._modelid = random.random() _macroTrajectoriesReport( self.macronum, _macroTrajSt(self.data.St, self.macro_ofcluster), self.data.simlist)
def test_CK_expectation_against_MSM(self): obs = np.eye(3) # observe every state cktest = self.vamp.cktest(observables=obs, statistics=None, mlags=4) pred = cktest.predictions[1:] est = cktest.estimates[1:] for i, (est_, pred_) in enumerate(zip(est, pred)): msm = estimate_markov_model(dtrajs=self.dtrajs, lag=self.lag*(i+1), reversible=False) msm_esti = self.p0.T.dot(msm.P).dot(obs) msm_pred = self.p0.T.dot(np.linalg.matrix_power(self.msm.P, (i+1))).dot(obs) np.testing.assert_allclose(pred_, msm_pred, atol=self.atol) np.testing.assert_allclose(est_, msm_esti, atol=self.atol) np.testing.assert_allclose(est_, pred_, atol=0.006)
def __init__(self, molecular_topology_file, trajectory, transition_matrix, num_clusters): # Build Markov model with PyEmma feat = coor.featurizer(molecular_topology_file) X = coor.load(trajectory, feat) Y = coor.tica(X, dim=2).get_output() k_means = coor.cluster_kmeans(Y, k=num_clusters) centroids = get_centroids(k_means) markov_model = msm.estimate_markov_model(kmeans.dtrajs, 100) # previous_transition_matrix = transition_matrix self.transition_matrix = markov_model.get_transition_matrix() # figure this out self._is_converged = relative_entropy(self.transition_matrix, transition_matrix) < tol
def estimateMSM(trajectories, lagtime, error_est=False): """ Estimate a MSM from the trajectories using a provided lagtime that should be big enough so that the relevant processes have converged. Return a MaximumLikelihoodMSM object""" if error_est: print "Computing msm with bayes error calc" MSM_object = MSM.bayesian_markov_model(trajectories, lagtime) else: print "Computing msm with no error calc" MSM_object = MSM.estimate_markov_model(trajectories, lagtime, count_mode='sliding') return MSM_object
def lengthVsNtrajs(data, nruns, lagtime, clusters, outputFilename, cache, m, stride): nClusters = len(clusters) nLags = len(lagtime) results = np.zeros((nClusters, nLags)) results_cv = np.zeros((nClusters, nLags)) for i, cl in enumerate(clusters): clustering = coor.cluster_kmeans(data=data, k=cl, max_iter=500, stride=stride) for j, lag in enumerate(lagtime): if (cl, lag) in cache: print( "Loading cached computation for %d clusters and %d lagtime" % (cl, lag)) results[i][j], results_cv[i][j] = cache[(cl, lag)] with open(outputFilename, 'a') as f: f.write("%d %d %f %f\n" % (cl, lag, results[i][j], results_cv[i][j])) continue print("Computing for %d clusters and %d lagtime" % (cl, lag)) try: MSM = msm.estimate_markov_model(clustering.dtrajs, lag) print("MSM estimated on %d states" % MSM.nstates) except Exception: print("Estimation error in %d clusters, %d lagtime" % (cl, lag)) results[i][j] = 0.0 results_cv[i][j] = 0.0 continue try: results[i][j] = np.mean(MSM.score(MSM.dtrajs_full, score_k=m)) except Exception: print("Estimation error in %d clusters, %d lagtime" % (cl, lag)) results[i][j] = 0.0 results_cv[i][j] = 0.0 continue try: results_cv[i][j] = np.mean( MSM.score_cv(MSM.dtrajs_full, score_k=m, n=nruns)) except Exception: print("Estimation error in %d clusters, %d lagtime" % (cl, lag)) results_cv[i][j] = 0.0 with open(outputFilename, 'a') as f: f.write("%d %d %f %f\n" % (cl, lag, results[i][j], results_cv[i][j])) return results, results_cv
def markovModel(self, lag, macronum): """ Build a Markov model at a given lag time and calculate metastable states Parameters ---------- lag : int The lag time at which to calculate the Markov state model in frames. macronum : int The number of macrostates (metastable states) to produce Examples -------- >>> model = Model(data) >>> model.markovModel(150, 4) # 150 frames lag, 4 macrostates """ import pyemma.msm as msm self._integrityCheck(markov=True) if not isinstance(lag, int): lag = int(lag) logger.warning( 'The lag given to markovModel() was not an integer. Converting to integer: {}' .format(lag)) self.lag = lag self.msm = msm.estimate_markov_model(self.data.St.tolist(), self.lag) self.P = self.msm.transition_matrix self.micro_ofcluster = -np.ones(self.data.K + 1, dtype=int) self.micro_ofcluster[self.msm.active_set] = np.arange( len(self.msm.active_set)) self.cluster_ofmicro = self.msm.active_set self.micronum = len(self.msm.active_set) self.coarsemsm = self.msm.pcca(macronum) # Fixing pyemma macrostates self.macronum = len(set(self.msm.metastable_assignments)) mask = np.ones(macronum, dtype=int) * -1 mask[list(set(self.msm.metastable_assignments))] = range(self.macronum) self.macro_ofmicro = mask[self.msm.metastable_assignments] self.macro_ofcluster = -np.ones(self.data.K + 1, dtype=int) self.macro_ofcluster[self.msm.active_set] = self.macro_ofmicro logger.info('{:.1f}% of the data was used'.format( self.msm.active_count_fraction * 100)) self._modelid = random.random() _macroTrajectoriesReport( self.macronum, _macroTrajSt(self.data.St, self.macro_ofcluster), self.data.simlist)
def __init__(self, topfile, trajectory, P, N): # Build markov model with PyEmma feat = coor.featurizer(topfile) X = coor.load(trajectory, feat) Y = coor.tica(X, dim=2).get_output() k_means = coor.cluster_kmeans(Y, k=N) centroids = get_centroids(k_means) M = msm.estimate_markov_model(kmeans.dtrajs, 100) # Q = n-1 transition matrix, P = n transition matrix Q = P self.P = M.get_transition_matrix() # figure this out self._is_converged = relative_entropy(self.P, Q) < tol
def markovModel(self, lag, macronum, units='frames', sparse=False): """ Build a Markov model at a given lag time and calculate metastable states Parameters ---------- lag : int The lag time at which to calculate the Markov state model. The units are specified with the `units` argument. macronum : int The number of macrostates (metastable states) to produce units : str The units of lag. Can be 'frames' or any time unit given as a string. sparse : bool Make the transition matrix sparse. Useful if lots (> 4000) states are used for the MSM. Warning: untested. Examples -------- >>> model = Model(data) >>> model.markovModel(150, 4) # 150 frames lag, 4 macrostates """ import pyemma.msm as msm self._integrityCheck(markov=True) lag = unitconvert(units, 'frames', lag, fstep=self.data.fstep) self.lag = lag self.msm = msm.estimate_markov_model(self.data.St.tolist(), self.lag, sparse=sparse) self.P = self.msm.transition_matrix self.micro_ofcluster = -np.ones(self.data.K+1, dtype=int) self.micro_ofcluster[self.msm.active_set] = np.arange(len(self.msm.active_set)) self.cluster_ofmicro = self.msm.active_set self.micronum = len(self.msm.active_set) self.coarsemsm = self.msm.pcca(macronum) # Fixing pyemma macrostates self.macronum = len(set(self.msm.metastable_assignments)) mask = np.ones(macronum, dtype=int) * -1 mask[list(set(self.msm.metastable_assignments))] = range(self.macronum) self.macro_ofmicro = mask[self.msm.metastable_assignments] self.macro_ofcluster = -np.ones(self.data.K+1, dtype=int) self.macro_ofcluster[self.msm.active_set] = self.macro_ofmicro logger.info('{:.1f}% of the data was used'.format(self.msm.active_count_fraction * 100)) self._modelid = random.random() _macroTrajectoriesReport(self.macronum, _macroTrajSt(self.data.St, self.macro_ofcluster), self.data.simlist)
def markovModel(self, lag, macronum, units='frames', sparse=False, hmm=False): """ Build a Markov model at a given lag time and calculate metastable states Parameters ---------- lag : int The lag time at which to calculate the Markov state model. The units are specified with the `units` argument. macronum : int The number of macrostates (metastable states) to produce units : str The units of lag. Can be 'frames' or any time unit given as a string. sparse : bool Make the transition matrix sparse. Useful if lots (> 4000) states are used for the MSM. Warning: untested. Examples -------- >>> model = Model(data) >>> model.markovModel(150, 4) # 150 frames lag, 4 macrostates """ import pyemma.msm as msm self._integrityCheck(markov=True) lag = unitconvert(units, 'frames', lag, fstep=self.data.fstep) self.lag = lag self.msm = msm.estimate_markov_model(self.data.St.tolist(), self.lag, sparse=sparse) modelflag = False while not modelflag: self.coarsemsm = self.msm.pcca(macronum) if len(np.unique(self.msm.metastable_assignments)) != macronum: macronum -= 1 logger.warning('PCCA returned empty macrostates. Reducing the number of macrostates to {}.'.format(macronum)) else: modelflag = True if macronum < 2: raise RuntimeError('Could not create even two macrostates. Please revise your clustering.') self._modelid = random.random() if hmm: # Still in development self.hmm = self.msm.coarse_grain(self.macronum) logger.info('{:.1f}% of the data was used'.format(self.msm.active_count_fraction * 100)) _macroTrajectoriesReport(self.macronum, _macroTrajSt(self.data.St, self.macro_ofcluster), self.data.simlist)
def markovModel(self, lag, macronum): """ Build a Markov model at a given lag time and calculate metastable states Parameters ---------- lag : int The lag time at which to calculate the Markov state model in frames. macronum : int The number of macrostates (metastable states) to produce Examples -------- >>> model = Model(data) >>> model.markovModel(150, 4) # 150 frames lag, 4 macrostates """ import pyemma.msm as msm self._integrityCheck(markov=True) if not isinstance(lag, int): lag = int(lag) logger.warning('The lag given to markovModel() was not an integer. Converting to integer: {}'.format(lag)) self.lag = lag self.msm = msm.estimate_markov_model(self.data.St.tolist(), self.lag) self.P = self.msm.transition_matrix self.micro_ofcluster = -np.ones(self.data.K+1, dtype=int) self.micro_ofcluster[self.msm.active_set] = np.arange(len(self.msm.active_set)) self.cluster_ofmicro = self.msm.active_set self.micronum = len(self.msm.active_set) self.coarsemsm = self.msm.pcca(macronum) # Fixing pyemma macrostates self.macronum = len(set(self.msm.metastable_assignments)) mask = np.ones(macronum, dtype=int) * -1 mask[list(set(self.msm.metastable_assignments))] = range(self.macronum) self.macro_ofmicro = mask[self.msm.metastable_assignments] self.macro_ofcluster = -np.ones(self.data.K+1, dtype=int) self.macro_ofcluster[self.msm.active_set] = self.macro_ofmicro logger.info('{:.1f}% of the data was used'.format(self.msm.active_count_fraction * 100)) self._modelid = random.random() _macroTrajectoriesReport(self.macronum, _macroTrajSt(self.data.St, self.macro_ofcluster), self.data.simlist)
def test_ck_msm(self): MLMSM = msm.estimate_markov_model( [self.double_well_data.dtraj_T100K_dt10_n6good], 40) ck = MLMSM.cktest(2, mlags=[0, 1, 10]) estref = np.array([[[1., 0.], [0., 1.]], [[0.89806859, 0.10193141], [0.10003466, 0.89996534]], [[0.64851782, 0.35148218], [0.34411751, 0.65588249]]]) predref = np.array([[[1., 0.], [0., 1.]], [[0.89806859, 0.10193141], [0.10003466, 0.89996534]], [[0.62613723, 0.37386277], [0.3669059, 0.6330941]]]) # rough agreement with MLE assert np.allclose(ck.estimates, estref, rtol=0.1, atol=10.0) assert ck.estimates_conf[0] is None assert ck.estimates_conf[1] is None assert np.allclose(ck.predictions, predref, rtol=0.1, atol=10.0) assert ck.predictions_conf[0] is None assert ck.predictions_conf[1] is None
def markovModel(self, lag, macronum): """ Build a Markov model at a given lag time and calculate metastable states Parameters ---------- lag : int The lag time at which to calculate the Markov state model in frames. macronum : int The number of macrostates (metastable states) to produce Examples -------- >>> model = ModelHMM(data) >>> model.markovModel(150, 4) # 150 frames lag, 4 macrostates """ import pyemma.msm as msm self._integrityCheck(markov=True) self.lag = lag self.msm = msm.estimate_markov_model(self.data.St.tolist(), self.lag) self.hmm = self.msm.coarse_grain(macronum) self.micro_ofcluster = -np.ones(self.data.K + 1, dtype=int) self.micro_ofcluster[self.msm.active_set] = np.arange( len(self.msm.active_set)) self.cluster_ofmicro = self.msm.active_set self.micronum = len(self.msm.active_set) self.P = self.hmm.transition_matrix self.macronum = np.size(self.P, 0) # Fixing pyemma macrostates self.macronum = len(set(self.hmm.metastable_assignments)) mask = np.ones(macronum, dtype=int) * -1 mask[list(set(self.msm.metastable_assignments))] = range(self.macronum) self.macro_ofmicro = mask[self.hmm.metastable_assignments] self.macro_ofcluster = -np.ones(self.data.K + 1, dtype=int) self.macro_ofcluster[self.msm.active_set] = self.macro_ofmicro self._modelid = random.random()
def markovModel(self, lag, macronum): """ Build a Markov model at a given lag time and calculate metastable states Parameters ---------- lag : int The lag time at which to calculate the Markov state model in frames. macronum : int The number of macrostates (metastable states) to produce Examples -------- >>> model = ModelHMM(data) >>> model.markovModel(150, 4) # 150 frames lag, 4 macrostates """ import pyemma.msm as msm self._integrityCheck(markov=True) self.lag = lag self.msm = msm.estimate_markov_model(self.data.St.tolist(), self.lag) self.hmm = self.msm.coarse_grain(macronum) self.micro_ofcluster = -np.ones(self.data.K+1, dtype=int) self.micro_ofcluster[self.msm.active_set] = np.arange(len(self.msm.active_set)) self.cluster_ofmicro = self.msm.active_set self.micronum = len(self.msm.active_set) self.P = self.hmm.transition_matrix self.macronum = np.size(self.P, 0) # Fixing pyemma macrostates self.macronum = len(set(self.hmm.metastable_assignments)) mask = np.ones(macronum, dtype=int) * -1 mask[list(set(self.msm.metastable_assignments))] = range(self.macronum) self.macro_ofmicro = mask[self.hmm.metastable_assignments] self.macro_ofcluster = -np.ones(self.data.K+1, dtype=int) self.macro_ofcluster[self.msm.active_set] = self.macro_ofmicro self._modelid = random.random()
def test_time_units(self): dtraj = np.random.randint(0, 4, 1000) tau = 12 dt = 0.456 msmobj = estimate_markov_model(dtraj, lag=tau, dt_traj='%f ns' % dt) # check MFPT consistency mfpt_ref = msmobj.mfpt([0], [1]) tptobj = tpt(msmobj, [0], [1]) assert_allclose(tptobj.mfpt, mfpt_ref) assert_allclose( msmana.mfpt(msmobj.P, [1], [0], tau=tau) * dt, mfpt_ref) assert_allclose( np.dot(msmobj.stationary_distribution, tptobj.backward_committor) / tptobj.total_flux, mfpt_ref) # check flux consistency total_flux_ref = tptobj.total_flux A = tptobj.A B = tptobj.B I = tptobj.I assert_allclose( tptobj.gross_flux[A, :][:, B].sum() + tptobj.gross_flux[A, :][:, I].sum(), total_flux_ref) assert_allclose( tptobj.net_flux[A, :][:, B].sum() + tptobj.net_flux[A, :][:, I].sum(), total_flux_ref) assert_allclose( tptobj.flux[A, :][:, B].sum() + tptobj.flux[A, :][:, I].sum(), total_flux_ref) mf = tptobj.major_flux(1.0) assert_allclose(mf[A, :][:, B].sum() + mf[A, :][:, I].sum(), total_flux_ref) # check that the coarse-grained version is consistent too _, tptobj2 = tptobj.coarse_grain([A, I, B]) assert_allclose(tptobj2.total_flux, total_flux_ref) assert_allclose(tptobj2.mfpt, mfpt_ref)
def estimateDG(data, nruns, cl, lag, ntraj, len_traj, skipFirstSnaphots, cluster_each_iteration): deltaG = [] if not cluster_each_iteration: clustering = coor.cluster_kmeans(data=data, k=cl, max_iter=500, stride=1) for _ in range(nruns): data_it = select_iteration_data(data, ntraj) data_it = [data[j][skipFirstSnaphots:len_traj] for j in data_it] if cluster_each_iteration: clustering = coor.cluster_kmeans(data=data_it, k=cl, max_iter=500, stride=1) dtrajs = clustering.dtrajs else: dtrajs = clustering.assign(data_it) try: MSM = msm.estimate_markov_model(dtrajs, lag) print("MSM estimated on %d states" % MSM.nstates) except Exception: print( "Estimation error in %d clusters, %d lagtime, %d trajectories of %d steps" % (cl, lag, ntraj, len_traj)) continue pi, cl_centers = compute.ensure_connectivity(MSM, clustering.clustercenters) d = 0.75 bins = compute.create_box(cl_centers, data_it, d) microstateVolume = compute.calculate_microstate_volumes_new( cl_centers, data_it, bins, d) _, string = compute.calculate_pmf(microstateVolume, pi) value = float(string.split()[1]) deltaG.append(value) return np.mean(deltaG), np.std(deltaG)
from pyemma.msm import estimate_markov_model import numpy as np """ method 1 """ #msmrev=OOMReweightedMSM(lag=150,sparse=True,reversible=False,rank_Ct='bootstrap_trajs') #msmrev=OOMReweightedMSM(lag=150,sparse=True,reversible=False) #tol_rank=10.0 or smaller? #sparse=True/False #reversible=True """ method2 """ sequence=np.load('all_faked_trajs_0.npy') dtrajs=[sequence[i] for i in range(len(sequence))] #msmrev_fit=msmrev.fit(dtrajs) msm = estimate_markov_model(dtrajs, lag=200, weights='oom') np.save('msm_timescales.npy',msm.timescales()) #msm.stationary_distribution
config.show_progress_bars = False lag = args.tica_lag feat = coor.featurizer(topfile) feat.add_backbone_torsions() inp = coor.source(trajfiles, feat) dim = args.tica_dim tica_obj = coor.tica(inp, lag=lag, dim=dim, kinetic_map=False) Y = tica_obj.get_output() cl = coor.cluster_kmeans(data=Y, k=args.msm_states, stride=args.stride) M = msm.estimate_markov_model(cl.dtrajs, args.msm_lag) # with open("model.dtraj", "w") as f: # f.write("\n".join(" ".join(map(str, x)) for x in cl.dtrajs)) # # # np.savetxt("model.dtraj", cl.dtrajs, delimiter=" ", fmt='%d') # np.savetxt("model.msm", M.P, delimiter=",") data = { 'input': { 'frames': inp.n_frames_total(), 'dimension': inp.dimension(), 'trajectories': inp.number_of_trajectories(), 'lengths': inp.trajectory_lengths().tolist(), }, 'tica': {
def _find_omega_msm(self) -> np.ndarray: _, bins = np.histogram(self._newcv, self._size) newcv_ind = np.digitize(self._newcv, bins) return msm.estimate_markov_model( newcv_ind, self._lag_time, reversible=False ).eigenvalues()
if args.display: pp.show() pp.clf() pp.close() fig, (ax1, ax2) = pp.subplots(1,2) ax1.scatter(cc_x, cc_y, marker='o', color='black') ax2 = mplt.plot_free_energy(np.vstack(Y)[:,0], np.vstack(Y)[:,1], cbar_label=None) if args.save: pp.savefig(os.path.join(args.save_destination, 'msm_tica_all.png')) if args.display: pp.show() pp.clf() pp.close() ### #actually generate MSM from data msm_from_data = msm.estimate_markov_model(dtrajs=mapped_data, lag=lagtime) #plot and/or save implied timescales, if specified if args.timescales: its = msm.timescales_msm(dtrajs=mapped_data, lags=500) mplt.plot_implied_timescales(its, show_mean=False, ylog=True, dt=25, units='ps', linewidth=2) if args.save: pp.savefig(os.path.join(args.save_destination, 'msm_its.png')) if args.display: pp.show() pp.clf() pp.close() #### #pcca cluster using specified n_sets msm_from_data.pcca(n_sets)
def setUp(self): """Store state of the rng""" self.state = np.random.mtrand.get_state() """Reseed the rng to enforce 'deterministic' behavior""" np.random.mtrand.seed(42) """Meta-stable birth-death chain""" b = 2 q = np.zeros(7) p = np.zeros(7) q[1:] = 0.5 p[0:-1] = 0.5 q[2] = 1.0 - 10 ** (-b) q[4] = 10 ** (-b) p[2] = 10 ** (-b) p[4] = 1.0 - 10 ** (-b) bdc = BirthDeathChain(q, p) P = bdc.transition_matrix() dtraj = generate_traj(P, 10000, start=0) tau = 1 """Estimate MSM""" MSM = estimate_markov_model(dtraj, tau) C_MSM = MSM.count_matrix_full lcc_MSM = MSM.largest_connected_set Ccc_MSM = MSM.count_matrix_active P_MSM = MSM.transition_matrix mu_MSM = MSM.stationary_distribution """Meta-stable sets""" A = [0, 1, 2] B = [4, 5, 6] w_MSM = np.zeros((2, mu_MSM.shape[0])) w_MSM[0, A] = mu_MSM[A] / mu_MSM[A].sum() w_MSM[1, B] = mu_MSM[B] / mu_MSM[B].sum() K = 10 P_MSM_dense = P_MSM p_MSM = np.zeros((K, 2)) w_MSM_k = 1.0 * w_MSM for k in range(1, K): w_MSM_k = np.dot(w_MSM_k, P_MSM_dense) p_MSM[k, 0] = w_MSM_k[0, A].sum() p_MSM[k, 1] = w_MSM_k[1, B].sum() """Assume that sets are equal, A(\tau)=A(k \tau) for all k""" w_MD = 1.0 * w_MSM p_MD = np.zeros((K, 2)) eps_MD = np.zeros((K, 2)) p_MSM[0, :] = 1.0 p_MD[0, :] = 1.0 eps_MD[0, :] = 0.0 for k in range(1, K): """Build MSM at lagtime k*tau""" C_MD = cmatrix(dtraj, k * tau, sliding=True) / (k * tau) lcc_MD = largest_connected_set(C_MD) Ccc_MD = connected_cmatrix(C_MD, lcc=lcc_MD) c_MD = Ccc_MD.sum(axis=1) P_MD = tmatrix(Ccc_MD).toarray() w_MD_k = np.dot(w_MD, P_MD) """Set A""" prob_MD = w_MD_k[0, A].sum() c = c_MD[A].sum() p_MD[k, 0] = prob_MD eps_MD[k, 0] = np.sqrt(k * (prob_MD - prob_MD ** 2) / c) """Set B""" prob_MD = w_MD_k[1, B].sum() c = c_MD[B].sum() p_MD[k, 1] = prob_MD eps_MD[k, 1] = np.sqrt(k * (prob_MD - prob_MD ** 2) / c) """Input""" self.MSM = MSM self.K = K self.A = A self.B = B """Expected results""" self.p_MSM = p_MSM self.p_MD = p_MD self.eps_MD = eps_MD
def initial_model_discrete(observations, nstates, lag=1, reversible=True): """Generate an initial model with discrete output densities Parameters ---------- observations : list of ndarray((T_i), dtype=int) list of arrays of length T_i with observation data nstates : int The number of states. lag : int, optional, default=1 The lag time to use for initializing the model. TODO ---- * Why do we have a `lag` option? Isn't the HMM model, by definition, lag=1 everywhere? Why would this be useful instead of just having the user subsample the data? Examples -------- Generate initial model for a discrete output model. >>> from bhmm import testsystems >>> [model, observations, states] = testsystems.generate_synthetic_observations(output_model_type='discrete') >>> initial_model = initial_model_discrete(observations, model.nstates) """ # check input if not reversible: warnings.warn("nonreversible initialization of discrete HMM currently not supported. Using a reversible matrix for initialization.") reversible = True # import emma inside function in order to avoid dependency loops from pyemma import msm # estimate Markov model MSM = msm.estimate_markov_model(observations, lag, reversible=True, connectivity='largest') # PCCA pcca = MSM.pcca(nstates) # HMM output matrix B_conn = MSM.metastable_distributions #print 'B_conn = \n',B_conn # full state space output matrix nstates_full = MSM.count_matrix_full.shape[0] eps = 0.01 * (1.0/nstates_full) # default output probability, in order to avoid zero columns B = eps * np.ones((nstates,nstates_full), dtype=np.float64) # expand B_conn to full state space B[:,MSM.active_set] = B_conn[:,:] # renormalize B to make it row-stochastic B /= B.sum(axis=1)[:,None] # coarse-grained transition matrix M = pcca.memberships W = np.linalg.inv(np.dot(M.T, M)) A = np.dot(np.dot(M.T, MSM.transition_matrix), M) P_coarse = np.dot(W, A) # symmetrize and renormalize to eliminate numerical errors X = np.dot(np.diag(pcca.coarse_grained_stationary_probability), P_coarse) X = 0.5 * (X + X.T) # if there are values < 0, set to eps X = np.maximum(X, eps) # turn into coarse-grained transition matrix A = X / X.sum(axis=1)[:, None] logger().info('Initial model: ') logger().info('transition matrix = \n'+str(A)) logger().info('output matrix = \n'+str(B.T)) # initialize HMM # -------------- output_model = DiscreteOutputModel(B) model = HMM(A, output_model) return model