def plotTimescales(self, lags=None, errors=None, nits=None, results=False, plot=True): """ Plot the implied timescales of MSMs of various lag times Parameters ---------- lags : list The lag times at which to compute the timescales. By default it spreads out 25 lag times linearly from lag 10 until the mode length of the trajectories. errors : errors Calculate errors using Bayes (Refer to pyEMMA documentation) nits : int Number of implied timescales to calculate. Default: all results : bool If the method should return the calculated implied timescales plot : bool If the method should display the plot of implied timescales Returns ------- If given `results`=True this method will return the following data its : np.ndarray The calculated implied timescales. 2D array with dimensions (len(`lags`), `nits`) lags : np.ndarray A list of the lag times that were used to calculate the implied timescales Examples -------- >>> model = Model(data) >>> model.plotTimescales() >>> model.plotTimescales(lags=list(range(1,100,5))) """ import pyemma.plots as mplt import pyemma.msm as msm self._integrityCheck() if lags is None: lags = self._defaultLags() if nits is None: nits = np.min((self.data.K, 20)) from htmd.config import _config its = msm.its(self.data.St.tolist(), lags=lags, errors=errors, nits=nits, n_jobs=_config['ncpus']) if plot: plt.ion() plt.figure() mplt.plot_implied_timescales(its, dt=self.data.fstep, units='ns') plt.show() if results: return its.get_timescales(), its.lags
def plotTimescales(self, lags=None, units='frames', errors=None, nits=None, results=False, plot=True): """ Plot the implied timescales of MSMs of various lag times Parameters ---------- lags : list The lag times at which to compute the timescales. By default it spreads out 25 lag times linearly from lag 10 until the mode length of the trajectories. units : str The units of lag. Can be 'frames' or any time unit given as a string. errors : errors Calculate errors using Bayes (Refer to pyEMMA documentation) nits : int Number of implied timescales to calculate. Default: all results : bool If the method should return the calculated implied timescales plot : bool If the method should display the plot of implied timescales Returns ------- If given `results`=True this method will return the following data its : np.ndarray The calculated implied timescales. 2D array with dimensions (len(`lags`), `nits`) lags : np.ndarray A list of the lag times that were used to calculate the implied timescales Examples -------- >>> model = Model(data) >>> model.plotTimescales() >>> model.plotTimescales(lags=list(range(1,100,5))) """ import pyemma.plots as mplt import pyemma.msm as msm self._integrityCheck() if lags is None: lags = self._defaultLags() else: lags = unitconvert(units, 'frames', lags, fstep=self.data.fstep).tolist() if nits is None: nits = np.min((self.data.K, 20)) from htmd.config import _config its = msm.its(self.data.St.tolist(), lags=lags, errors=errors, nits=nits, n_jobs=_config['ncpus']) if plot: from matplotlib import pylab as plt plt.ion() plt.figure() mplt.plot_implied_timescales(its, dt=self.data.fstep, units='ns') plt.show() if results: return its.get_timescales(), its.lags
def setUpClass(cls): P = np.array([ [0.5, .25, .25, 0.], [0., .25, .5, .25], [.25, .25, .5, 0], [.25, .25, .25, .25], ]) # bogus its object lags = [1, 2, 3, 5, 10] cls.its = its(generate_traj(P, 100), lags=lags, errors='bayes') cls.refs = cls.its.timescales[-1] return cls
def test_its_bmsm(self): estimator = msm.its([self.double_well_data.dtraj_T100K_dt10_n6good], lags = [10, 50, 200], errors='bayes', nsamples=1000) ref = np.array([[ 284.87479737, 6.68390402, 3.0375248, 2.65314172, 1.93066562], [ 320.08583492, 11.14612743, 10.3450663, 9.42799075, 8.2109752 ], [ 351.41541961, 42.87427869, 41.17841657, 37.35485197, 23.24254608]]) # rough agreement with MLE assert np.allclose(estimator.timescales, ref, rtol=0.1, atol=10.0) # within left / right intervals. This test should fail only 1 out of 1000 times. L, R = estimator.get_sample_conf(conf=0.999) np.testing.assert_array_less(L, estimator.timescales) np.testing.assert_array_less(estimator.timescales, R)
def test_its_bmsm(self): estimator = msm.its([self.double_well_data.dtraj_T100K_dt10_n6good], lags = [10, 50, 200], errors='bayes', nsamples=1000, n_jobs=2) ref = np.array([[ 284.87479737, 6.68390402, 3.0375248, 2.65314172, 1.93066562], [ 320.08583492, 11.14612743, 10.3450663, 9.42799075, 8.2109752 ], [ 351.41541961, 42.87427869, 41.17841657, 37.35485197, 23.24254608]]) # rough agreement with MLE assert np.allclose(estimator.timescales, ref, rtol=0.1, atol=10.0) # within left / right intervals. This test should fail only 1 out of 1000 times. L, R = estimator.get_sample_conf(conf=0.999) # we only test the first timescale, because the second is already ambiguous (deviations after the first place), # which makes this tests fail stochastically. np.testing.assert_array_less(L[0], estimator.timescales[0]) np.testing.assert_array_less(estimator.timescales[0], R[0])
def _calculateITS(self): is_converged = False # its print(("Calculating implied time-scales, when it's done will prompt " "for confirmation on the validity of the lagtimes...")) while not is_converged: if not self.error: itsErrors = None elif self.error: itsErrors = "bayes" if self.lagtimes and self.lagtimes is not None: # workaround to get new its plot at each iteration, the # plot_implied_timescales function is calling plt.gca() and # recovers the previous plot's axes, by creating a new figure # gca gets a set of empty axes and plots are fine plt.figure() its_object = msm.its(self.dtrajs, lags=self.lagtimes, errors=itsErrors) mplt.plot_implied_timescales(its_object, outfile=self.itsOutput, nits=self.numberOfITS) plt.savefig("its.png") if self.lagtime is not None: return self.lagtime while True: plt.show() convergence_answer = raw_input("Has the ITS plot converged?[y/n] ") convergence_answer.rstrip() convergence_answer = convergence_answer or "y" # Making yes the default answer if convergence_answer.lower() == "y" or convergence_answer.lower() == "yes": is_converged = True lagtime_str = raw_input("Please input the lagtime to construct the MSM: ") lagtime = int(lagtime_str.rstrip()) break elif convergence_answer.lower() == "n" or convergence_answer.lower() == "no": break else: print("Answer not valid. Please answer yes or no") if not is_converged: new_lagtimes = raw_input("Do you want to define new lagtimes or add to the previous?[add(a)/new(n)] ") new_lagtimes.rstrip() if new_lagtimes.lower() == "add" or new_lagtimes.lower() == "a": lag_list = raw_input("Please input the lagtimes you want to add separated by a space: ") lag_list.rstrip() self.lagtimes.extend(map(int, lag_list.split(" "))) elif new_lagtimes.lower() == "new" or new_lagtimes.lower() == "n": lag_list = raw_input("Please input the new lagtimes separated by a space: ") lag_list.rstrip() self.lagtimes = map(int, lag_list.split(" ")) self.lagtimes.sort() return lagtime
def _calculateITS(self): print("Calculating implied time-scales") if not self.error: itsErrors = None elif self.error: itsErrors = "bayes" if self.lagtimes and self.lagtimes is not None: # workaround to get new its plot at each iteration, the # plot_implied_timescales function is calling plt.gca() and # recovers the previous plot's axes, by creating a new figure # gca gets a set of empty axes and plots are fine plt.figure() its_object = msm.its(self.dtrajs, lags=self.lagtimes, errors=itsErrors) mplt.plot_implied_timescales(its_object, outfile=self.itsOutput, nits=self.numberOfITS) plt.savefig("its.png") if self.lagtime is not None: return self.lagtime
def plot_implied_timescales(dtrajs, nits=15, model_name=""): """ Compute and plot implied timescales. Parameters ---------- dtrajs nits model_name Returns ------- its : :class:`ImpliedTimescales <pyemma.msm.estimators.implied_timescales.ImpliedTimescales>` object """ its = msm.its(dtrajs, lags=lags, nits=nits, errors="bayes") mplt.plot_implied_timescales(its, dt=0.25, units="ns") plt.title(model_name) return its
def plot_implied_timescales(dtrajs,nits=15,model_name=''): ''' Compute and plot implied timescales. Parameters ---------- dtrajs nits model_name Returns ------- its : :class:`ImpliedTimescales <pyemma.msm.estimators.implied_timescales.ImpliedTimescales>` object ''' its = msm.its(dtrajs, lags=lags, nits=nits)#,errors='bayes') mplt.plot_implied_timescales(its,dt=0.25,units='ns') plt.title(model_name) plt.savefig('{0}_implied_timescales.pdf'.format(model_name)) plt.close() return its
def maxConnectedLag(self, lags): """ Heuristic for getting the lagtime before a timescale drops. It calculates the last lagtime before a drop occurs in the first implied timescale due to disconnected states. If the top timescale is closer to the second top timescale at the previous lagtime than to itself at the previous lagtime it means that a drop occured. The lagtime before the drop is returned. Parameters ---------- lags : np.ndarray or list A list of lag times for which to calculate the implied timescales Returns ------- ml : int The maximum lagtime before a drop occurs in the top timescale Examples -------- >>> model = Model(data) >>> model.maxConnectedLag(list(range(1, 100, 5))) """ if len(lags) == 1: return lags if isinstance(lags, np.ndarray): lags = lags.astype(int) import pyemma.msm as msm itime = msm.its(self.data.St.tolist(), lags=lags, nits=2).get_timescales() for i in range(1, np.size(itime, 0)): if abs(itime[i, 0] - itime[i - 1, 1]) < abs(itime[i, 0] - itime[i - 1, 0]): lagidx = i - 1 break else: lagidx = i return lags[lagidx], itime
def maxConnectedLag(self, lags): """ Heuristic for getting the lagtime before a timescale drops. It calculates the last lagtime before a drop occurs in the first implied timescale due to disconnected states. If the top timescale is closer to the second top timescale at the previous lagtime than to itself at the previous lagtime it means that a drop occured. The lagtime before the drop is returned. Parameters ---------- lags : np.ndarray or list A list of lag times for which to calculate the implied timescales Returns ------- ml : int The maximum lagtime before a drop occurs in the top timescale Examples -------- >>> model = Model(data) >>> model.maxConnectedLag(list(range(1, 100, 5))) """ if len(lags) == 1: return lags if isinstance(lags, np.ndarray): lags = lags.astype(int) import pyemma.msm as msm itime = msm.its(self.data.St.tolist(), lags=lags, nits=2).get_timescales() for i in range(1, np.size(itime, 0)): if abs(itime[i, 0] - itime[i-1, 1]) < abs(itime[i, 0] - itime[i-1, 0]): lagidx = i-1 break else: lagidx = i return lags[lagidx], itime
if i == 3: for j in range(4): axes[i][j].set_xlabel("TIC " + str(j + 2), fontsize=20) axes[0][0].annotate("TICA " + f_str, fontsize=24, xy=(0, 0), xytext=(1.8, 1.1), xycoords="axes fraction", textcoords="axes fraction") fig.savefig(msm_savedir + "/tic_hist_grid.pdf") n_clusters = 300 msm_lags = [1, 10, 20, 50, 100, 200] cluster = coor.cluster_kmeans(k=n_clusters) coor.pipeline([reader, tica, cluster]) its = msm.its(cluster.dtrajs, lags=msm_lags) plt.figure() mplt.plot_implied_timescales(its) plt.title(msm_savedir) plt.savefig(msm_savedir + "/its_vs_lag_ylog.pdf") #plt.figure() #plt.plot(np.arange(1,21), M.timescales()[:20], 'o') #ymin, ymax = plt.ylim() #plt.ylim(0, ymax) #plt.savefig("msm_ti.pdf")
import pyemma.coordinates as coor import numpy as np import pyemma.msm as msm import pyemma.plots as pyemma_plots import matplotlib.pyplot as plt sys = 'fdis' n_clusters = 100 dtrajs = coor.load(f'cluster_data/{sys}_{n_clusters}_cluster_dtrajs.h5') max_lag = 80 dt2 = [i.astype(np.int_) for i in dtrajs] dt3 = [i.reshape((i.shape[0])) for i in dt2] its = msm.its(dt3, lags=max_lag, nits=8, errors='bayes', nsamples=200) fig, ax = plt.subplots() pyemma_plots.plot_implied_timescales(its, units='ns', ax=ax) fig.savefig(f'{sys}_implied_timescale_{max_lag}.pdf')
elapsed_time = final_time - initial_time print('Elapsed time %.3f s' % elapsed_time) # Save cluster centers np.save('clustercenters', clustering.clustercenters) # Save discrete trajectories. dtrajs = clustering.dtrajs dtrajs_dir = 'dtrajs' clustering.save_dtrajs(output_dir=dtrajs_dir, output_format='npy', extension='.npy') ################################################################################ # Make timescale plots ################################################################################ import matplotlib as mpl mpl.use('Agg') # Don't use display import matplotlib.pyplot as plt from pyemma import msm from pyemma import plots lags = [1, 2, 5, 10, 20, 50] #its = msm.its(dtrajs, lags=lags, errors='bayes') its = msm.its(dtrajs, lags=lags) plots.plot_implied_timescales(its) plt.savefig('plot.pdf')
elapsed_time = final_time - initial_time print('Elapsed time %.3f s' % elapsed_time) # Save cluster centers np.save('clustercenters', clustering.clustercenters) # Save discrete trajectories. dtrajs = clustering.dtrajs dtrajs_dir = 'dtrajs' clustering.save_dtrajs(output_dir=dtrajs_dir, output_format='npy', extension='.npy') ################################################################################ # Make timescale plots ################################################################################ import matplotlib as mpl mpl.use('Agg') # Don't use display import matplotlib.pyplot as plt from pyemma import msm from pyemma import plots lags = [1,2,5,10,20,50] #its = msm.its(dtrajs, lags=lags, errors='bayes') its = msm.its(dtrajs, lags=lags) plots.plot_implied_timescales(its) plt.savefig('plot.pdf')
# This script is used to test the MSM # construction with coring after clustering # using PyEmma # # Please refer to PyEmma documentation for more information import pyemma import pyemma.msm as msm import numpy as np from udpclust import UDPClust as dp tica_traj=[] for i in range(4): fname='DATA/test-its-traj'+str(i)+'.dat' # tr=tica_traj[i][::25] tica_traj.append(np.loadtxt(fname)) cl_dpa=dp.cluster_UDP(dim=6,trj_tot=tica_traj,stride=10) ctrajs=cl_dpa.get_core_traj() its=msm.its(ctrajs,lags=range(1,10,1)) np.savetxt('out-msm_rho.dat',cl_dpa.rho,fmt="%.6e") np.savetxt('out-msm_its.dat',its.timescales,fmt="%.6e")
def calculateITS(trajectories, lagtimes, errors=None): """ Calulate the implied time-scales at the given lagtimes""" its_object = MSM.its(trajectories, lags=lagtimes, errors=errors) return its_object
def plotTimescales(self, lags=None, units='frames', errors=None, nits=None, results=False, plot=True, save=None): """ Plot the implied timescales of MSMs of various lag times Parameters ---------- lags : list The lag times at which to compute the timescales. By default it spreads out 25 lag times linearly from lag 10 until the mode length of the trajectories. units : str The units of lag. Can be 'frames' or any time unit given as a string. errors : errors Calculate errors using Bayes (Refer to pyEMMA documentation) nits : int Number of implied timescales to calculate. Default: all results : bool If the method should return the calculated implied timescales plot : bool If the method should display the plot of implied timescales save : str Path of the file in which to save the figure Returns ------- If given results=True this method will return the following data its : np.ndarray The calculated implied timescales. 2D array with dimensions (len(`lags`), `nits`) lags : np.ndarray A list of the lag times that were used to calculate the implied timescales Examples -------- >>> model = Model(data) >>> model.plotTimescales() >>> model.plotTimescales(lags=list(range(1,100,5))) """ import pyemma.plots as mplt import pyemma.msm as msm self._integrityCheck() if lags is None: lags = self._defaultLags() else: lags = unitconvert(units, 'frames', lags, fstep=self.data.fstep).tolist() if nits is None: nits = np.min((self.data.K, 20)) from htmd.config import _config its = msm.its(self.data.St.tolist(), lags=lags, errors=errors, nits=nits, n_jobs=_config['ncpus']) if plot or (save is not None): from matplotlib import pylab as plt plt.ion() plt.figure() try: mplt.plot_implied_timescales(its, dt=self.data.fstep, units='ns') except ValueError as ve: plt.close() raise ValueError( '{} This is probably caused by badly set fstep in the data ({}). ' .format(ve, self.data.fstep) + 'Please correct the model.data.fstep to correspond to the simulation frame step in nanoseconds.' ) if save is not None: plt.savefig(save, dpi=300, bbox_inches='tight', pad_inches=0.2) if plot: plt.show() if results: return its.get_timescales(), its.lags
def plotTimescales(self, lags=None, units='frames', errors=None, nits=None, results=False, plot=True, save=None): """ Plot the implied timescales of MSMs of various lag times Parameters ---------- lags : list The lag times at which to compute the timescales. By default it spreads out 25 lag times linearly from lag 10 until the mode length of the trajectories. units : str The units of lag. Can be 'frames' or any time unit given as a string. errors : errors Calculate errors using Bayes (Refer to pyEMMA documentation) nits : int Number of implied timescales to calculate. Default: all results : bool If the method should return the calculated implied timescales plot : bool If the method should display the plot of implied timescales save : str Path of the file in which to save the figure Returns ------- If given results=True this method will return the following data its : np.ndarray The calculated implied timescales. 2D array with dimensions (len(`lags`), `nits`) lags : np.ndarray A list of the lag times that were used to calculate the implied timescales Examples -------- >>> model = Model(data) >>> model.plotTimescales() >>> model.plotTimescales(lags=list(range(1,100,5))) """ import pyemma.plots as mplt import pyemma.msm as msm self._integrityCheck() if lags is None: lags = self._defaultLags() else: lags = unitconvert(units, 'frames', lags, fstep=self.data.fstep).tolist() if nits is None: nits = np.min((self.data.K, 20)) from htmd.config import _config its = msm.its(self.data.St.tolist(), lags=lags, errors=errors, nits=nits, n_jobs=_config['ncpus']) if plot or (save is not None): from matplotlib import pylab as plt plt.ion() plt.figure() try: mplt.plot_implied_timescales(its, dt=self.data.fstep, units='ns') except ValueError as ve: plt.close() raise ValueError('{} This is probably caused by badly set fstep in the data ({}). '.format(ve, self.data.fstep) + 'Please correct the model.data.fstep to correspond to the simulation frame step in nanoseconds.') if save is not None: plt.savefig(save, dpi=300, bbox_inches='tight', pad_inches=0.2) if plot: plt.show() if results: return its.get_timescales(), its.lags
plt.figure(figsize=(5, 3)) plt.plot(time_scale_sep, linewidth=0, marker='o') #plt.axvline(x=last_slow_index+0.5,color='r') plt.axvline(x=0.5, color='g') plt.xlabel('index') plt.ylabel('timescale separation') plt.xlim(0, 30) plt.savefig('timescale_separation.png') #print('%s macrostates chosen from timescale separation' %n_macrostates_timescales) print('%s macrostates chosen because thats what we want' % n_macrostates) plt.clf() lags = [1, 2, 5, 10, 20, 50, 100, 200, 400] its = msm.its(clkmeans.dtrajs, lags=lags) mplt.plot_implied_timescales(its) plt.savefig('implied_timescale_plot.png') plt.clf() print('fraction of states used = ', MSM.active_state_fraction) print('fraction of counts used = ', MSM.active_count_fraction) mplt.plot_cktest(MSM.cktest(3)) plt.savefig('cktest_msm.png') plt.clf() plt.figure(figsize=(8, 5)) mplt.plot_free_energy(np.hstack(Y1),
print('Elapsed time %.3f s' % elapsed_time) # Save cluster centers #import cPickle as pickle #pickle.dump(clustering.clustercenters, open('clustercenters.p', 'wb')) np.save('clustercenters.npy', clustering.clustercenters) # Save discrete trajectories. dtrajs = clustering.dtrajs dtrajs_dir = 'dtrajs' clustering.save_dtrajs(output_dir=dtrajs_dir, output_format='npy', extension='.npy') ################################################################################ # Make timescale plots ################################################################################ import matplotlib as mpl mpl.use('Agg') # Don't use display import matplotlib.pyplot as plt from pyemma import msm from pyemma import plots lags = [1,2,5,10,20,50] its = msm.its(dtrajs, lags=lags, errors='bayes') plots.plot_implied_timescales(its) plt.savefig('plot.pdf')