def __init__(self, data, lag, units='frames', dimensions=None): from pyemma.coordinates.transform.tica import TICA as TICApyemma self.data = data self.dimensions = dimensions if isinstance(data, Metric): # Memory efficient TICA projecting trajectories on the fly if units != 'frames': raise RuntimeError('Cannot use delayed projection TICA with units other than frames for now. Report this to HTMD issues.') self.tic = TICApyemma(lag) metr = data p = ProgressBar(len(metr.simulations)) for proj in _projectionGenerator(metr, _getNcpus()): for pro in proj: if pro is None: continue if self.dimensions is None: self.tic.partial_fit(pro[0]) else: # Sub-select dimensions for fitting self.tic.partial_fit(pro[0][:, self.dimensions]) p.progress(len(proj)) p.stop() else: # In-memory TICA lag = unitconvert(units, 'frames', lag, data.fstep) if lag == 0: raise RuntimeError('Lag time conversion resulted in 0 frames. Please use a larger lag-time for TICA.') self.tic = TICApyemma(lag) if self.dimensions is None: datalist = data.dat.tolist() else: # Sub-select dimensions for fitting datalist = [x[:, self.dimensions].copy() for x in data.dat] self.tic.fit(datalist)
def __init__(self, data, lag, units='frames'): from pyemma.coordinates import tica # data.dat.tolist() might be better? self.data = data if isinstance(data, Metric): if units != 'frames': raise RuntimeError( 'Cannot use delayed projection TICA with units other than frames for now. Report this to HTMD issues.' ) metr = data from pyemma.coordinates.transform.tica import TICA self.tic = TICA(lag) p = ProgressBar(len(metr.simulations)) for proj in _projectionGenerator(metr, _getNcpus()): for pro in proj: self.tic.partial_fit(pro[0]) p.progress(len(proj)) p.stop() else: lag = unitconvert(units, 'frames', lag, data.fstep) if lag == 0: raise RuntimeError( 'Lag time conversion resulted in 0 frames. Please use a larger lag-time for TICA.' ) self.tic = tica(data.dat.tolist(), lag=lag)
def markovModel(self, lag, macronum, units='frames', sparse=False, hmm=False): """ Build a Markov model at a given lag time and calculate metastable states Parameters ---------- lag : int The lag time at which to calculate the Markov state model. The units are specified with the `units` argument. macronum : int The number of macrostates (metastable states) to produce units : str The units of lag. Can be 'frames' or any time unit given as a string. sparse : bool Make the transition matrix sparse. Useful if lots (> 4000) states are used for the MSM. Warning: untested. Examples -------- >>> model = Model(data) >>> model.markovModel(150, 4) # 150 frames lag, 4 macrostates """ import pyemma.msm as msm self._integrityCheck(markov=True) lag = unitconvert(units, 'frames', lag, fstep=self.data.fstep) self.lag = lag self.msm = msm.estimate_markov_model(self.data.St.tolist(), self.lag, sparse=sparse) modelflag = False while not modelflag: self.coarsemsm = self.msm.pcca(macronum) if len(np.unique(self.msm.metastable_assignments)) != macronum: macronum -= 1 logger.warning( 'PCCA returned empty macrostates. Reducing the number of macrostates to {}.' .format(macronum)) else: modelflag = True if macronum < 2: raise RuntimeError( 'Could not create even two macrostates. Please revise your clustering.' ) self._modelid = random.random() if hmm: # Still in development self.hmm = self.msm.coarse_grain(self.macronum) logger.info('{:.1f}% of the data was used'.format( self.msm.active_count_fraction * 100)) _macroTrajectoriesReport( self.macronum, _macroTrajSt(self.data.St, self.macro_ofcluster), self.data.simlist)
def _defaultLags(self, minlag=None, maxlag=None, numlags=None, units='frames'): from htmd.units import convert as unitconvert if maxlag is None: from scipy import stats maxlag = stats.mode(self.trajLengths).mode - 1 # -1 to avoid warnings in timescales calc else: maxlag = unitconvert(units, 'frames', maxlag, fstep=self.fstep) if minlag is None: if maxlag > 20: minlag = 10 else: minlag = 2 else: minlag = unitconvert(units, 'frames', minlag, fstep=self.fstep) return np.append(1, np.round(np.linspace(minlag, maxlag, numlags))).astype(int)
def __init__(self, data, lag, units='frames'): lag = unitconvert(units, 'frames', lag, data.fstep) if lag == 0: raise RuntimeError('Lag time conversion resulted in 0 frames. Please use a larger lag-time for TICA.') self.data = data datconcat = np.concatenate(self.data.dat) self.weights = self._autocorrelation(datconcat, lag)
def plotTimescales(self, lags=None, units='frames', errors=None, nits=None, results=False, plot=True): """ Plot the implied timescales of MSMs of various lag times Parameters ---------- lags : list The lag times at which to compute the timescales. By default it spreads out 25 lag times linearly from lag 10 until the mode length of the trajectories. units : str The units of lag. Can be 'frames' or any time unit given as a string. errors : errors Calculate errors using Bayes (Refer to pyEMMA documentation) nits : int Number of implied timescales to calculate. Default: all results : bool If the method should return the calculated implied timescales plot : bool If the method should display the plot of implied timescales Returns ------- If given `results`=True this method will return the following data its : np.ndarray The calculated implied timescales. 2D array with dimensions (len(`lags`), `nits`) lags : np.ndarray A list of the lag times that were used to calculate the implied timescales Examples -------- >>> model = Model(data) >>> model.plotTimescales() >>> model.plotTimescales(lags=list(range(1,100,5))) """ import pyemma.plots as mplt import pyemma.msm as msm self._integrityCheck() if lags is None: lags = self._defaultLags() else: lags = unitconvert(units, 'frames', lags, fstep=self.data.fstep).tolist() if nits is None: nits = np.min((self.data.K, 20)) from htmd.config import _config its = msm.its(self.data.St.tolist(), lags=lags, errors=errors, nits=nits, n_jobs=_config['ncpus']) if plot: from matplotlib import pylab as plt plt.ion() plt.figure() mplt.plot_implied_timescales(its, dt=self.data.fstep, units='ns') plt.show() if results: return its.get_timescales(), its.lags
def markovModel(self, lag, macronum, units='frames', sparse=False): """ Build a Markov model at a given lag time and calculate metastable states Parameters ---------- lag : int The lag time at which to calculate the Markov state model. The units are specified with the `units` argument. macronum : int The number of macrostates (metastable states) to produce units : str The units of lag. Can be 'frames' or any time unit given as a string. sparse : bool Make the transition matrix sparse. Useful if lots (> 4000) states are used for the MSM. Warning: untested. Examples -------- >>> model = Model(data) >>> model.markovModel(150, 4) # 150 frames lag, 4 macrostates """ import pyemma.msm as msm self._integrityCheck(markov=True) lag = unitconvert(units, 'frames', lag, fstep=self.data.fstep) self.lag = lag self.msm = msm.estimate_markov_model(self.data.St.tolist(), self.lag, sparse=sparse) self.P = self.msm.transition_matrix self.micro_ofcluster = -np.ones(self.data.K + 1, dtype=int) self.micro_ofcluster[self.msm.active_set] = np.arange( len(self.msm.active_set)) self.cluster_ofmicro = self.msm.active_set self.micronum = len(self.msm.active_set) self.coarsemsm = self.msm.pcca(macronum) # Fixing pyemma macrostates self.macronum = len(set(self.msm.metastable_assignments)) mask = np.ones(macronum, dtype=int) * -1 mask[list(set(self.msm.metastable_assignments))] = range(self.macronum) self.macro_ofmicro = mask[self.msm.metastable_assignments] self.macro_ofcluster = -np.ones(self.data.K + 1, dtype=int) self.macro_ofcluster[self.msm.active_set] = self.macro_ofmicro logger.info('{:.1f}% of the data was used'.format( self.msm.active_count_fraction * 100)) self._modelid = random.random() _macroTrajectoriesReport( self.macronum, _macroTrajSt(self.data.St, self.macro_ofcluster), self.data.simlist)
def markovModel(self, lag, macronum, units='frames', sparse=False): """ Build a Markov model at a given lag time and calculate metastable states Parameters ---------- lag : int The lag time at which to calculate the Markov state model. The units are specified with the `units` argument. macronum : int The number of macrostates (metastable states) to produce units : str The units of lag. Can be 'frames' or any time unit given as a string. sparse : bool Make the transition matrix sparse. Useful if lots (> 4000) states are used for the MSM. Warning: untested. Examples -------- >>> model = Model(data) >>> model.markovModel(150, 4) # 150 frames lag, 4 macrostates """ import pyemma.msm as msm self._integrityCheck(markov=True) lag = unitconvert(units, 'frames', lag, fstep=self.data.fstep) self.lag = lag self.msm = msm.estimate_markov_model(self.data.St.tolist(), self.lag, sparse=sparse) self.P = self.msm.transition_matrix self.micro_ofcluster = -np.ones(self.data.K+1, dtype=int) self.micro_ofcluster[self.msm.active_set] = np.arange(len(self.msm.active_set)) self.cluster_ofmicro = self.msm.active_set self.micronum = len(self.msm.active_set) self.coarsemsm = self.msm.pcca(macronum) # Fixing pyemma macrostates self.macronum = len(set(self.msm.metastable_assignments)) mask = np.ones(macronum, dtype=int) * -1 mask[list(set(self.msm.metastable_assignments))] = range(self.macronum) self.macro_ofmicro = mask[self.msm.metastable_assignments] self.macro_ofcluster = -np.ones(self.data.K+1, dtype=int) self.macro_ofcluster[self.msm.active_set] = self.macro_ofmicro logger.info('{:.1f}% of the data was used'.format(self.msm.active_count_fraction * 100)) self._modelid = random.random() _macroTrajectoriesReport(self.macronum, _macroTrajSt(self.data.St, self.macro_ofcluster), self.data.simlist)
def markovModel(self, lag, macronum, units='frames', sparse=False, hmm=False): """ Build a Markov model at a given lag time and calculate metastable states Parameters ---------- lag : int The lag time at which to calculate the Markov state model. The units are specified with the `units` argument. macronum : int The number of macrostates (metastable states) to produce units : str The units of lag. Can be 'frames' or any time unit given as a string. sparse : bool Make the transition matrix sparse. Useful if lots (> 4000) states are used for the MSM. Warning: untested. Examples -------- >>> model = Model(data) >>> model.markovModel(150, 4) # 150 frames lag, 4 macrostates """ import pyemma.msm as msm self._integrityCheck(markov=True) lag = unitconvert(units, 'frames', lag, fstep=self.data.fstep) self.lag = lag self.msm = msm.estimate_markov_model(self.data.St.tolist(), self.lag, sparse=sparse) modelflag = False while not modelflag: self.coarsemsm = self.msm.pcca(macronum) if len(np.unique(self.msm.metastable_assignments)) != macronum: macronum -= 1 logger.warning('PCCA returned empty macrostates. Reducing the number of macrostates to {}.'.format(macronum)) else: modelflag = True if macronum < 2: raise RuntimeError('Could not create even two macrostates. Please revise your clustering.') self._modelid = random.random() if hmm: # Still in development self.hmm = self.msm.coarse_grain(self.macronum) logger.info('{:.1f}% of the data was used'.format(self.msm.active_count_fraction * 100)) _macroTrajectoriesReport(self.macronum, _macroTrajSt(self.data.St, self.macro_ofcluster), self.data.simlist)
def __init__(self, data, lag, units="frames", dimensions=None, njobs=None): from pyemma.coordinates.transform.tica import TICA as TICApyemma from tqdm import tqdm from htmd.util import _getNjobs self.data = data self.dimensions = dimensions self.njobs = njobs if njobs is not None else _getNjobs() if isinstance( data, Metric ): # Memory efficient TICA projecting trajectories on the fly if units != "frames": raise RuntimeError( "Cannot use delayed projection TICA with units other than frames for now. Report this to HTMD issues." ) self.tic = TICApyemma(lag) metr = data pbar = tqdm(total=len(metr.simulations)) for proj in _projectionGenerator(metr, self.njobs): for pro in proj: if pro is None: continue if self.dimensions is None: self.tic.partial_fit(pro[0]) else: # Sub-select dimensions for fitting self.tic.partial_fit(pro[0][:, self.dimensions]) pbar.update(len(proj)) pbar.close() else: # In-memory TICA lag = unitconvert(units, "frames", lag, data.fstep) if lag == 0: raise RuntimeError( "Lag time conversion resulted in 0 frames. Please use a larger lag-time for TICA." ) self.tic = TICApyemma(lag) if self.dimensions is None: datalist = data.dat.tolist() else: # Sub-select dimensions for fitting datalist = [x[:, self.dimensions].copy() for x in data.dat] self.tic.fit(datalist)
def __init__(self, data, lag, units='frames'): from pyemma.coordinates import tica # data.dat.tolist() might be better? self.data = data if isinstance(data, Metric): from pyemma.coordinates.transform.tica import TICA lag = unitconvert(units, 'frames', lag, data.fstep) self.tic = TICA(lag) p = ProgressBar(len(data.simulations)) for i in range(len(data.simulations)): # Fix for pyemma bug. Remove eventually: d, _, _ = data._projectSingle(i) if d is None or d.shape[0] < lag: continue self.tic.partial_fit(d) p.progress() p.stop() else: self.tic = tica(data.dat.tolist(), lag=lag)
def plotTimescales(self, lags=None, units='frames', errors=None, nits=None, results=False, plot=True, save=None): """ Plot the implied timescales of MSMs of various lag times Parameters ---------- lags : list The lag times at which to compute the timescales. By default it spreads out 25 lag times linearly from lag 10 until the mode length of the trajectories. units : str The units of lag. Can be 'frames' or any time unit given as a string. errors : errors Calculate errors using Bayes (Refer to pyEMMA documentation) nits : int Number of implied timescales to calculate. Default: all results : bool If the method should return the calculated implied timescales plot : bool If the method should display the plot of implied timescales save : str Path of the file in which to save the figure Returns ------- If given results=True this method will return the following data its : np.ndarray The calculated implied timescales. 2D array with dimensions (len(`lags`), `nits`) lags : np.ndarray A list of the lag times that were used to calculate the implied timescales Examples -------- >>> model = Model(data) >>> model.plotTimescales() >>> model.plotTimescales(lags=list(range(1,100,5))) """ import pyemma.plots as mplt import pyemma.msm as msm self._integrityCheck() if lags is None: lags = self._defaultLags() else: lags = unitconvert(units, 'frames', lags, fstep=self.data.fstep).tolist() if nits is None: nits = np.min((self.data.K, 20)) from htmd.config import _config its = msm.its(self.data.St.tolist(), lags=lags, errors=errors, nits=nits, n_jobs=_config['ncpus']) if plot or (save is not None): from matplotlib import pylab as plt plt.ion() plt.figure() try: mplt.plot_implied_timescales(its, dt=self.data.fstep, units='ns') except ValueError as ve: plt.close() raise ValueError('{} This is probably caused by badly set fstep in the data ({}). '.format(ve, self.data.fstep) + 'Please correct the model.data.fstep to correspond to the simulation frame step in nanoseconds.') if save is not None: plt.savefig(save, dpi=300, bbox_inches='tight', pad_inches=0.2) if plot: plt.show() if results: return its.get_timescales(), its.lags
def plotTimescales(self, lags=None, units='frames', errors=None, nits=None, results=False, plot=True, save=None): """ Plot the implied timescales of MSMs of various lag times Parameters ---------- lags : list The lag times at which to compute the timescales. By default it spreads out 25 lag times linearly from lag 10 until the mode length of the trajectories. units : str The units of lag. Can be 'frames' or any time unit given as a string. errors : errors Calculate errors using Bayes (Refer to pyEMMA documentation) nits : int Number of implied timescales to calculate. Default: all results : bool If the method should return the calculated implied timescales plot : bool If the method should display the plot of implied timescales save : str Path of the file in which to save the figure Returns ------- If given results=True this method will return the following data its : np.ndarray The calculated implied timescales. 2D array with dimensions (len(`lags`), `nits`) lags : np.ndarray A list of the lag times that were used to calculate the implied timescales Examples -------- >>> model = Model(data) >>> model.plotTimescales() >>> model.plotTimescales(lags=list(range(1,100,5))) """ import pyemma.plots as mplt import pyemma.msm as msm self._integrityCheck() if lags is None: lags = self._defaultLags() else: lags = unitconvert(units, 'frames', lags, fstep=self.data.fstep).tolist() if nits is None: nits = np.min((self.data.K, 20)) from htmd.config import _config its = msm.its(self.data.St.tolist(), lags=lags, errors=errors, nits=nits, n_jobs=_config['ncpus']) if plot or (save is not None): from matplotlib import pylab as plt plt.ion() plt.figure() try: mplt.plot_implied_timescales(its, dt=self.data.fstep, units='ns') except ValueError as ve: plt.close() raise ValueError( '{} This is probably caused by badly set fstep in the data ({}). ' .format(ve, self.data.fstep) + 'Please correct the model.data.fstep to correspond to the simulation frame step in nanoseconds.' ) if save is not None: plt.savefig(save, dpi=300, bbox_inches='tight', pad_inches=0.2) if plot: plt.show() if results: return its.get_timescales(), its.lags