Beispiel #1
0
    def plotTimescales(self,
                       lags=None,
                       errors=None,
                       nits=None,
                       results=False,
                       plot=True):
        """ Plot the implied timescales of MSMs of various lag times

        Parameters
        ----------
        lags : list
            The lag times at which to compute the timescales. By default it spreads out 25 lag times linearly from lag
            10 until the mode length of the trajectories.
        errors : errors
            Calculate errors using Bayes (Refer to pyEMMA documentation)
        nits : int
            Number of implied timescales to calculate. Default: all
        results : bool
            If the method should return the calculated implied timescales
        plot : bool
            If the method should display the plot of implied timescales

        Returns
        -------
        If given `results`=True this method will return the following data
        its : np.ndarray
            The calculated implied timescales. 2D array with dimensions (len(`lags`), `nits`)
        lags : np.ndarray
            A list of the lag times that were used to calculate the implied timescales

        Examples
        --------
        >>> model = Model(data)
        >>> model.plotTimescales()
        >>> model.plotTimescales(lags=list(range(1,100,5)))
        """
        import pyemma.plots as mplt
        import pyemma.msm as msm
        self._integrityCheck()
        if lags is None:
            lags = self._defaultLags()
        if nits is None:
            nits = np.min((self.data.K, 20))

        from htmd.config import _config
        its = msm.its(self.data.St.tolist(),
                      lags=lags,
                      errors=errors,
                      nits=nits,
                      n_jobs=_config['ncpus'])
        if plot:
            plt.ion()
            plt.figure()
            mplt.plot_implied_timescales(its, dt=self.data.fstep, units='ns')
            plt.show()
        if results:
            return its.get_timescales(), its.lags
Beispiel #2
0
    def plotTimescales(self, lags=None, units='frames', errors=None, nits=None, results=False, plot=True):
        """ Plot the implied timescales of MSMs of various lag times

        Parameters
        ----------
        lags : list
            The lag times at which to compute the timescales. By default it spreads out 25 lag times linearly from lag
            10 until the mode length of the trajectories.
        units : str
            The units of lag. Can be 'frames' or any time unit given as a string.
        errors : errors
            Calculate errors using Bayes (Refer to pyEMMA documentation)
        nits : int
            Number of implied timescales to calculate. Default: all
        results : bool
            If the method should return the calculated implied timescales
        plot : bool
            If the method should display the plot of implied timescales

        Returns
        -------
        If given `results`=True this method will return the following data
        its : np.ndarray
            The calculated implied timescales. 2D array with dimensions (len(`lags`), `nits`)
        lags : np.ndarray
            A list of the lag times that were used to calculate the implied timescales

        Examples
        --------
        >>> model = Model(data)
        >>> model.plotTimescales()
        >>> model.plotTimescales(lags=list(range(1,100,5)))
        """
        import pyemma.plots as mplt
        import pyemma.msm as msm
        self._integrityCheck()
        if lags is None:
            lags = self._defaultLags()
        else:
            lags = unitconvert(units, 'frames', lags, fstep=self.data.fstep).tolist()

        if nits is None:
            nits = np.min((self.data.K, 20))

        from htmd.config import _config
        its = msm.its(self.data.St.tolist(), lags=lags, errors=errors, nits=nits, n_jobs=_config['ncpus'])
        if plot:
            from matplotlib import pylab as plt
            plt.ion()
            plt.figure()
            mplt.plot_implied_timescales(its, dt=self.data.fstep, units='ns')
            plt.show()
        if results:
            return its.get_timescales(), its.lags
Beispiel #3
0
 def setUpClass(cls):
     P = np.array([
         [0.5, .25, .25, 0.],
         [0., .25, .5, .25],
         [.25, .25, .5, 0],
         [.25, .25, .25, .25],
     ])
     # bogus its object
     lags = [1, 2, 3, 5, 10]
     cls.its = its(generate_traj(P, 100), lags=lags, errors='bayes')
     cls.refs = cls.its.timescales[-1]
     return cls
Beispiel #4
0
 def test_its_bmsm(self):
     estimator = msm.its([self.double_well_data.dtraj_T100K_dt10_n6good], lags = [10, 50, 200],
                         errors='bayes', nsamples=1000)
     ref = np.array([[ 284.87479737,    6.68390402,    3.0375248,     2.65314172,    1.93066562],
                     [ 320.08583492,   11.14612743,   10.3450663,     9.42799075,    8.2109752 ],
                     [ 351.41541961,   42.87427869,   41.17841657,   37.35485197,   23.24254608]])
     # rough agreement with MLE
     assert np.allclose(estimator.timescales, ref, rtol=0.1, atol=10.0)
     # within left / right intervals. This test should fail only 1 out of 1000 times.
     L, R = estimator.get_sample_conf(conf=0.999)
     np.testing.assert_array_less(L, estimator.timescales)
     np.testing.assert_array_less(estimator.timescales, R)
Beispiel #5
0
 def test_its_bmsm(self):
     estimator = msm.its([self.double_well_data.dtraj_T100K_dt10_n6good], lags = [10, 50, 200],
                         errors='bayes', nsamples=1000, n_jobs=2)
     ref = np.array([[ 284.87479737,    6.68390402,    3.0375248,     2.65314172,    1.93066562],
                     [ 320.08583492,   11.14612743,   10.3450663,     9.42799075,    8.2109752 ],
                     [ 351.41541961,   42.87427869,   41.17841657,   37.35485197,   23.24254608]])
     # rough agreement with MLE
     assert np.allclose(estimator.timescales, ref, rtol=0.1, atol=10.0)
     # within left / right intervals. This test should fail only 1 out of 1000 times.
     L, R = estimator.get_sample_conf(conf=0.999)
     # we only test the first timescale, because the second is already ambiguous (deviations after the first place),
     # which makes this tests fail stochastically.
     np.testing.assert_array_less(L[0], estimator.timescales[0])
     np.testing.assert_array_less(estimator.timescales[0], R[0])
Beispiel #6
0
 def _calculateITS(self):
     is_converged = False
     # its
     print(("Calculating implied time-scales, when it's done will prompt "
            "for confirmation on the validity of the lagtimes..."))
     while not is_converged:
         if not self.error:
             itsErrors = None
         elif self.error:
             itsErrors = "bayes"
         if self.lagtimes and self.lagtimes is not None:
             # workaround to get new its plot at each iteration, the
             # plot_implied_timescales function is calling plt.gca() and
             # recovers the previous plot's axes, by creating a new figure
             # gca gets a set of empty axes and plots are fine
             plt.figure()
             its_object = msm.its(self.dtrajs, lags=self.lagtimes, errors=itsErrors)
             mplt.plot_implied_timescales(its_object, outfile=self.itsOutput, nits=self.numberOfITS)
             plt.savefig("its.png")
         if self.lagtime is not None:
             return self.lagtime
         while True:
             plt.show()
             convergence_answer = raw_input("Has the ITS plot converged?[y/n] ")
             convergence_answer.rstrip()
             convergence_answer = convergence_answer or "y"  # Making yes the default answer
             if convergence_answer.lower() == "y" or convergence_answer.lower() == "yes":
                 is_converged = True
                 lagtime_str = raw_input("Please input the lagtime to construct the MSM: ")
                 lagtime = int(lagtime_str.rstrip())
                 break
             elif convergence_answer.lower() == "n" or convergence_answer.lower() == "no":
                 break
             else:
                 print("Answer not valid. Please answer yes or no")
         if not is_converged:
             new_lagtimes = raw_input("Do you want to define new lagtimes or add to the previous?[add(a)/new(n)] ")
             new_lagtimes.rstrip()
             if new_lagtimes.lower() == "add" or new_lagtimes.lower() == "a":
                 lag_list = raw_input("Please input the lagtimes you want to add separated by a space: ")
                 lag_list.rstrip()
                 self.lagtimes.extend(map(int, lag_list.split(" ")))
             elif new_lagtimes.lower() == "new" or new_lagtimes.lower() == "n":
                 lag_list = raw_input("Please input the new lagtimes separated by a space: ")
                 lag_list.rstrip()
                 self.lagtimes = map(int, lag_list.split(" "))
             self.lagtimes.sort()
     return lagtime
 def _calculateITS(self):
     print("Calculating implied time-scales")
     if not self.error:
         itsErrors = None
     elif self.error:
         itsErrors = "bayes"
     if self.lagtimes and self.lagtimes is not None:
         # workaround to get new its plot at each iteration, the
         # plot_implied_timescales function is calling plt.gca() and
         # recovers the previous plot's axes, by creating a new figure
         # gca gets a set of empty axes and plots are fine
         plt.figure()
         its_object = msm.its(self.dtrajs, lags=self.lagtimes, errors=itsErrors)
         mplt.plot_implied_timescales(its_object, outfile=self.itsOutput, nits=self.numberOfITS)
         plt.savefig("its.png")
     if self.lagtime is not None:
         return self.lagtime
Beispiel #8
0
def plot_implied_timescales(dtrajs, nits=15, model_name=""):
    """ Compute and plot implied timescales.

   Parameters
   ----------
   dtrajs
   nits
   model_name

   Returns
   -------
   its : :class:`ImpliedTimescales <pyemma.msm.estimators.implied_timescales.ImpliedTimescales>` object
   """

    its = msm.its(dtrajs, lags=lags, nits=nits, errors="bayes")
    mplt.plot_implied_timescales(its, dt=0.25, units="ns")
    plt.title(model_name)
    return its
Beispiel #9
0
def plot_implied_timescales(dtrajs,nits=15,model_name=''):
   ''' Compute and plot implied timescales.

   Parameters
   ----------
   dtrajs
   nits
   model_name

   Returns
   -------
   its : :class:`ImpliedTimescales <pyemma.msm.estimators.implied_timescales.ImpliedTimescales>` object
   '''

   its = msm.its(dtrajs, lags=lags, nits=nits)#,errors='bayes')
   mplt.plot_implied_timescales(its,dt=0.25,units='ns')
   plt.title(model_name)
   plt.savefig('{0}_implied_timescales.pdf'.format(model_name))
   plt.close()
   return its
Beispiel #10
0
    def maxConnectedLag(self, lags):
        """ Heuristic for getting the lagtime before a timescale drops.

        It calculates the last lagtime before a drop occurs in the first implied timescale due to disconnected states.
        If the top timescale is closer to the second top timescale at the previous lagtime than to itself at the previous
        lagtime it means that a drop occured. The lagtime before the drop is returned.

        Parameters
        ----------
        lags : np.ndarray or list
            A list of lag times for which to calculate the implied timescales

        Returns
        -------
        ml : int
            The maximum lagtime before a drop occurs in the top timescale

        Examples
        --------
        >>> model = Model(data)
        >>> model.maxConnectedLag(list(range(1, 100, 5)))
        """
        if len(lags) == 1:
            return lags
        if isinstance(lags, np.ndarray):
            lags = lags.astype(int)

        import pyemma.msm as msm
        itime = msm.its(self.data.St.tolist(), lags=lags,
                        nits=2).get_timescales()

        for i in range(1, np.size(itime, 0)):
            if abs(itime[i, 0] - itime[i - 1, 1]) < abs(itime[i, 0] -
                                                        itime[i - 1, 0]):
                lagidx = i - 1
                break
            else:
                lagidx = i
        return lags[lagidx], itime
Beispiel #11
0
    def maxConnectedLag(self, lags):
        """ Heuristic for getting the lagtime before a timescale drops.

        It calculates the last lagtime before a drop occurs in the first implied timescale due to disconnected states.
        If the top timescale is closer to the second top timescale at the previous lagtime than to itself at the previous
        lagtime it means that a drop occured. The lagtime before the drop is returned.

        Parameters
        ----------
        lags : np.ndarray or list
            A list of lag times for which to calculate the implied timescales

        Returns
        -------
        ml : int
            The maximum lagtime before a drop occurs in the top timescale

        Examples
        --------
        >>> model = Model(data)
        >>> model.maxConnectedLag(list(range(1, 100, 5)))
        """
        if len(lags) == 1:
            return lags
        if isinstance(lags, np.ndarray):
            lags = lags.astype(int)

        import pyemma.msm as msm
        itime = msm.its(self.data.St.tolist(), lags=lags, nits=2).get_timescales()

        for i in range(1, np.size(itime, 0)):
            if abs(itime[i, 0] - itime[i-1, 1]) < abs(itime[i, 0] - itime[i-1, 0]):
                lagidx = i-1
                break
            else:
                lagidx = i
        return lags[lagidx], itime
Beispiel #12
0
        if i == 3:
            for j in range(4):
                axes[i][j].set_xlabel("TIC " + str(j + 2), fontsize=20)

    axes[0][0].annotate("TICA  " + f_str,
                        fontsize=24,
                        xy=(0, 0),
                        xytext=(1.8, 1.1),
                        xycoords="axes fraction",
                        textcoords="axes fraction")
    fig.savefig(msm_savedir + "/tic_hist_grid.pdf")

    n_clusters = 300
    msm_lags = [1, 10, 20, 50, 100, 200]

    cluster = coor.cluster_kmeans(k=n_clusters)
    coor.pipeline([reader, tica, cluster])
    its = msm.its(cluster.dtrajs, lags=msm_lags)

    plt.figure()
    mplt.plot_implied_timescales(its)
    plt.title(msm_savedir)
    plt.savefig(msm_savedir + "/its_vs_lag_ylog.pdf")

    #plt.figure()
    #plt.plot(np.arange(1,21), M.timescales()[:20], 'o')
    #ymin, ymax = plt.ylim()
    #plt.ylim(0, ymax)
    #plt.savefig("msm_ti.pdf")
import pyemma.coordinates as coor
import numpy as np
import pyemma.msm as msm
import pyemma.plots as pyemma_plots
import matplotlib.pyplot as plt

sys = 'fdis'
n_clusters = 100
dtrajs = coor.load(f'cluster_data/{sys}_{n_clusters}_cluster_dtrajs.h5')
max_lag = 80

dt2 = [i.astype(np.int_) for i in dtrajs]
dt3 = [i.reshape((i.shape[0])) for i in dt2]

its = msm.its(dt3, lags=max_lag, nits=8, errors='bayes', nsamples=200)

fig, ax = plt.subplots()
pyemma_plots.plot_implied_timescales(its, units='ns', ax=ax)
fig.savefig(f'{sys}_implied_timescale_{max_lag}.pdf')
Beispiel #14
0
elapsed_time = final_time - initial_time
print('Elapsed time %.3f s' % elapsed_time)

# Save cluster centers
np.save('clustercenters', clustering.clustercenters)

# Save discrete trajectories.
dtrajs = clustering.dtrajs
dtrajs_dir = 'dtrajs'
clustering.save_dtrajs(output_dir=dtrajs_dir,
                       output_format='npy',
                       extension='.npy')

################################################################################
# Make timescale plots
################################################################################

import matplotlib as mpl
mpl.use('Agg')  # Don't use display
import matplotlib.pyplot as plt

from pyemma import msm
from pyemma import plots

lags = [1, 2, 5, 10, 20, 50]
#its = msm.its(dtrajs, lags=lags, errors='bayes')
its = msm.its(dtrajs, lags=lags)
plots.plot_implied_timescales(its)

plt.savefig('plot.pdf')
Beispiel #15
0
elapsed_time = final_time - initial_time
print('Elapsed time %.3f s' % elapsed_time)

# Save cluster centers
np.save('clustercenters', clustering.clustercenters)

# Save discrete trajectories.
dtrajs = clustering.dtrajs
dtrajs_dir = 'dtrajs'
clustering.save_dtrajs(output_dir=dtrajs_dir, output_format='npy', extension='.npy')

################################################################################
# Make timescale plots
################################################################################

import matplotlib as mpl
mpl.use('Agg') # Don't use display
import matplotlib.pyplot as plt

from pyemma import msm
from pyemma import plots

lags = [1,2,5,10,20,50]
#its = msm.its(dtrajs, lags=lags, errors='bayes')
its = msm.its(dtrajs, lags=lags)
plots.plot_implied_timescales(its)

plt.savefig('plot.pdf')


Beispiel #16
0
# This script is used to test the MSM
# construction with coring after clustering
# using PyEmma
#
# Please refer to PyEmma documentation for more information

import pyemma
import pyemma.msm as msm
import numpy as np
from udpclust import UDPClust as dp

tica_traj=[]
for i in range(4):
    fname='DATA/test-its-traj'+str(i)+'.dat'
#    tr=tica_traj[i][::25]
    tica_traj.append(np.loadtxt(fname))

cl_dpa=dp.cluster_UDP(dim=6,trj_tot=tica_traj,stride=10)

ctrajs=cl_dpa.get_core_traj()

its=msm.its(ctrajs,lags=range(1,10,1))

np.savetxt('out-msm_rho.dat',cl_dpa.rho,fmt="%.6e")
np.savetxt('out-msm_its.dat',its.timescales,fmt="%.6e")

Beispiel #17
0
def calculateITS(trajectories, lagtimes, errors=None):
    """ Calulate the implied time-scales at the given lagtimes"""
    its_object = MSM.its(trajectories, lags=lagtimes, errors=errors)
    return its_object
Beispiel #18
0
    def plotTimescales(self,
                       lags=None,
                       units='frames',
                       errors=None,
                       nits=None,
                       results=False,
                       plot=True,
                       save=None):
        """ Plot the implied timescales of MSMs of various lag times

        Parameters
        ----------
        lags : list
            The lag times at which to compute the timescales. By default it spreads out 25 lag times linearly from lag
            10 until the mode length of the trajectories.
        units : str
            The units of lag. Can be 'frames' or any time unit given as a string.
        errors : errors
            Calculate errors using Bayes (Refer to pyEMMA documentation)
        nits : int
            Number of implied timescales to calculate. Default: all
        results : bool
            If the method should return the calculated implied timescales
        plot : bool
            If the method should display the plot of implied timescales
        save : str
            Path of the file in which to save the figure

        Returns
        -------
        If given results=True this method will return the following data
        its : np.ndarray
            The calculated implied timescales. 2D array with dimensions (len(`lags`), `nits`)
        lags : np.ndarray
            A list of the lag times that were used to calculate the implied timescales

        Examples
        --------
        >>> model = Model(data)
        >>> model.plotTimescales()
        >>> model.plotTimescales(lags=list(range(1,100,5)))
        """
        import pyemma.plots as mplt
        import pyemma.msm as msm
        self._integrityCheck()
        if lags is None:
            lags = self._defaultLags()
        else:
            lags = unitconvert(units, 'frames', lags,
                               fstep=self.data.fstep).tolist()

        if nits is None:
            nits = np.min((self.data.K, 20))

        from htmd.config import _config
        its = msm.its(self.data.St.tolist(),
                      lags=lags,
                      errors=errors,
                      nits=nits,
                      n_jobs=_config['ncpus'])
        if plot or (save is not None):
            from matplotlib import pylab as plt
            plt.ion()
            plt.figure()
            try:
                mplt.plot_implied_timescales(its,
                                             dt=self.data.fstep,
                                             units='ns')
            except ValueError as ve:
                plt.close()
                raise ValueError(
                    '{} This is probably caused by badly set fstep in the data ({}). '
                    .format(ve, self.data.fstep) +
                    'Please correct the model.data.fstep to correspond to the simulation frame step in nanoseconds.'
                )
            if save is not None:
                plt.savefig(save, dpi=300, bbox_inches='tight', pad_inches=0.2)
            if plot:
                plt.show()
        if results:
            return its.get_timescales(), its.lags
Beispiel #19
0
    def plotTimescales(self, lags=None, units='frames', errors=None, nits=None, results=False, plot=True, save=None):
        """ Plot the implied timescales of MSMs of various lag times

        Parameters
        ----------
        lags : list
            The lag times at which to compute the timescales. By default it spreads out 25 lag times linearly from lag
            10 until the mode length of the trajectories.
        units : str
            The units of lag. Can be 'frames' or any time unit given as a string.
        errors : errors
            Calculate errors using Bayes (Refer to pyEMMA documentation)
        nits : int
            Number of implied timescales to calculate. Default: all
        results : bool
            If the method should return the calculated implied timescales
        plot : bool
            If the method should display the plot of implied timescales
        save : str
            Path of the file in which to save the figure

        Returns
        -------
        If given results=True this method will return the following data
        its : np.ndarray
            The calculated implied timescales. 2D array with dimensions (len(`lags`), `nits`)
        lags : np.ndarray
            A list of the lag times that were used to calculate the implied timescales

        Examples
        --------
        >>> model = Model(data)
        >>> model.plotTimescales()
        >>> model.plotTimescales(lags=list(range(1,100,5)))
        """
        import pyemma.plots as mplt
        import pyemma.msm as msm
        self._integrityCheck()
        if lags is None:
            lags = self._defaultLags()
        else:
            lags = unitconvert(units, 'frames', lags, fstep=self.data.fstep).tolist()

        if nits is None:
            nits = np.min((self.data.K, 20))

        from htmd.config import _config
        its = msm.its(self.data.St.tolist(), lags=lags, errors=errors, nits=nits, n_jobs=_config['ncpus'])
        if plot or (save is not None):
            from matplotlib import pylab as plt
            plt.ion()
            plt.figure()
            try:
                mplt.plot_implied_timescales(its, dt=self.data.fstep, units='ns')
            except ValueError as ve:
                plt.close()
                raise ValueError('{} This is probably caused by badly set fstep in the data ({}). '.format(ve, self.data.fstep) +
                                 'Please correct the model.data.fstep to correspond to the simulation frame step in nanoseconds.')
            if save is not None:
                plt.savefig(save, dpi=300, bbox_inches='tight', pad_inches=0.2)
            if plot:
                plt.show()
        if results:
            return its.get_timescales(), its.lags
Beispiel #20
0
plt.figure(figsize=(5, 3))
plt.plot(time_scale_sep, linewidth=0, marker='o')
#plt.axvline(x=last_slow_index+0.5,color='r')
plt.axvline(x=0.5, color='g')
plt.xlabel('index')
plt.ylabel('timescale separation')
plt.xlim(0, 30)

plt.savefig('timescale_separation.png')

#print('%s macrostates chosen from timescale separation' %n_macrostates_timescales)
print('%s macrostates chosen because thats what we want' % n_macrostates)

plt.clf()
lags = [1, 2, 5, 10, 20, 50, 100, 200, 400]
its = msm.its(clkmeans.dtrajs, lags=lags)
mplt.plot_implied_timescales(its)

plt.savefig('implied_timescale_plot.png')
plt.clf()

print('fraction of states used = ', MSM.active_state_fraction)
print('fraction of counts used = ', MSM.active_count_fraction)

mplt.plot_cktest(MSM.cktest(3))

plt.savefig('cktest_msm.png')

plt.clf()
plt.figure(figsize=(8, 5))
mplt.plot_free_energy(np.hstack(Y1),
Beispiel #21
0
print('Elapsed time %.3f s' % elapsed_time)

# Save cluster centers
#import cPickle as pickle
#pickle.dump(clustering.clustercenters, open('clustercenters.p', 'wb'))
np.save('clustercenters.npy', clustering.clustercenters)

# Save discrete trajectories.
dtrajs = clustering.dtrajs
dtrajs_dir = 'dtrajs'
clustering.save_dtrajs(output_dir=dtrajs_dir, output_format='npy', extension='.npy')

################################################################################
# Make timescale plots
################################################################################

import matplotlib as mpl
mpl.use('Agg') # Don't use display
import matplotlib.pyplot as plt

from pyemma import msm
from pyemma import plots

lags = [1,2,5,10,20,50]
its = msm.its(dtrajs, lags=lags, errors='bayes')
plots.plot_implied_timescales(its)

plt.savefig('plot.pdf')