Beispiel #1
0
    def __init__(self, data, lag, units='frames', dimensions=None):
        from pyemma.coordinates.transform.tica import TICA as TICApyemma

        self.data = data
        self.dimensions = dimensions

        if isinstance(data, Metric):  # Memory efficient TICA projecting trajectories on the fly
            if units != 'frames':
                raise RuntimeError('Cannot use delayed projection TICA with units other than frames for now. Report this to HTMD issues.')
            self.tic = TICApyemma(lag)
            metr = data

            p = ProgressBar(len(metr.simulations))
            for proj in _projectionGenerator(metr, _getNcpus()):
                for pro in proj:
                    if pro is None:
                        continue
                    if self.dimensions is None:
                        self.tic.partial_fit(pro[0])
                    else:  # Sub-select dimensions for fitting
                        self.tic.partial_fit(pro[0][:, self.dimensions])
                p.progress(len(proj))
            p.stop()
        else:  # In-memory TICA
            lag = unitconvert(units, 'frames', lag, data.fstep)
            if lag == 0:
                raise RuntimeError('Lag time conversion resulted in 0 frames. Please use a larger lag-time for TICA.')

            self.tic = TICApyemma(lag)
            if self.dimensions is None:
                datalist = data.dat.tolist()
            else:  # Sub-select dimensions for fitting
                datalist = [x[:, self.dimensions].copy() for x in data.dat]
            self.tic.fit(datalist)
Beispiel #2
0
    def __init__(self, data, lag, units='frames'):
        from pyemma.coordinates import tica
        # data.dat.tolist() might be better?
        self.data = data
        if isinstance(data, Metric):
            if units != 'frames':
                raise RuntimeError(
                    'Cannot use delayed projection TICA with units other than frames for now. Report this to HTMD issues.'
                )
            metr = data
            from pyemma.coordinates.transform.tica import TICA
            self.tic = TICA(lag)

            p = ProgressBar(len(metr.simulations))
            for proj in _projectionGenerator(metr, _getNcpus()):
                for pro in proj:
                    self.tic.partial_fit(pro[0])
                p.progress(len(proj))
            p.stop()
        else:
            lag = unitconvert(units, 'frames', lag, data.fstep)
            if lag == 0:
                raise RuntimeError(
                    'Lag time conversion resulted in 0 frames. Please use a larger lag-time for TICA.'
                )
            self.tic = tica(data.dat.tolist(), lag=lag)
Beispiel #3
0
    def markovModel(self,
                    lag,
                    macronum,
                    units='frames',
                    sparse=False,
                    hmm=False):
        """ Build a Markov model at a given lag time and calculate metastable states

        Parameters
        ----------
        lag : int
            The lag time at which to calculate the Markov state model. The units are specified with the `units` argument.
        macronum : int
            The number of macrostates (metastable states) to produce
        units : str
            The units of lag. Can be 'frames' or any time unit given as a string.
        sparse : bool
            Make the transition matrix sparse. Useful if lots (> 4000) states are used for the MSM. Warning: untested.

        Examples
        --------
        >>> model = Model(data)
        >>> model.markovModel(150, 4)  # 150 frames lag, 4 macrostates
        """
        import pyemma.msm as msm
        self._integrityCheck(markov=True)

        lag = unitconvert(units, 'frames', lag, fstep=self.data.fstep)

        self.lag = lag
        self.msm = msm.estimate_markov_model(self.data.St.tolist(),
                                             self.lag,
                                             sparse=sparse)
        modelflag = False
        while not modelflag:
            self.coarsemsm = self.msm.pcca(macronum)
            if len(np.unique(self.msm.metastable_assignments)) != macronum:
                macronum -= 1
                logger.warning(
                    'PCCA returned empty macrostates. Reducing the number of macrostates to {}.'
                    .format(macronum))
            else:
                modelflag = True
            if macronum < 2:
                raise RuntimeError(
                    'Could not create even two macrostates. Please revise your clustering.'
                )

        self._modelid = random.random()

        if hmm:  # Still in development
            self.hmm = self.msm.coarse_grain(self.macronum)

        logger.info('{:.1f}% of the data was used'.format(
            self.msm.active_count_fraction * 100))

        _macroTrajectoriesReport(
            self.macronum, _macroTrajSt(self.data.St, self.macro_ofcluster),
            self.data.simlist)
Beispiel #4
0
    def _defaultLags(self, minlag=None, maxlag=None, numlags=None, units='frames'):
        from htmd.units import convert as unitconvert
        if maxlag is None:
            from scipy import stats
            maxlag = stats.mode(self.trajLengths).mode - 1  # -1 to avoid warnings in timescales calc
        else:
            maxlag = unitconvert(units, 'frames', maxlag, fstep=self.fstep)

        if minlag is None:
            if maxlag > 20:
                minlag = 10
            else:
                minlag = 2
        else:
            minlag = unitconvert(units, 'frames', minlag, fstep=self.fstep)

        return np.append(1, np.round(np.linspace(minlag, maxlag, numlags))).astype(int)
Beispiel #5
0
    def __init__(self, data, lag, units='frames'):
        lag = unitconvert(units, 'frames', lag, data.fstep)
        if lag == 0:
            raise RuntimeError('Lag time conversion resulted in 0 frames. Please use a larger lag-time for TICA.')
        self.data = data

        datconcat = np.concatenate(self.data.dat)
        self.weights = self._autocorrelation(datconcat, lag)
Beispiel #6
0
    def __init__(self, data, lag, units='frames'):
        lag = unitconvert(units, 'frames', lag, data.fstep)
        if lag == 0:
            raise RuntimeError('Lag time conversion resulted in 0 frames. Please use a larger lag-time for TICA.')
        self.data = data

        datconcat = np.concatenate(self.data.dat)
        self.weights = self._autocorrelation(datconcat, lag)
Beispiel #7
0
    def _defaultLags(self, minlag=None, maxlag=None, numlags=None, units='frames'):
        from htmd.units import convert as unitconvert
        if maxlag is None:
            from scipy import stats
            maxlag = stats.mode(self.trajLengths).mode - 1  # -1 to avoid warnings in timescales calc
        else:
            maxlag = unitconvert(units, 'frames', maxlag, fstep=self.fstep)

        if minlag is None:
            if maxlag > 20:
                minlag = 10
            else:
                minlag = 2
        else:
            minlag = unitconvert(units, 'frames', minlag, fstep=self.fstep)

        return np.append(1, np.round(np.linspace(minlag, maxlag, numlags))).astype(int)
Beispiel #8
0
    def plotTimescales(self, lags=None, units='frames', errors=None, nits=None, results=False, plot=True):
        """ Plot the implied timescales of MSMs of various lag times

        Parameters
        ----------
        lags : list
            The lag times at which to compute the timescales. By default it spreads out 25 lag times linearly from lag
            10 until the mode length of the trajectories.
        units : str
            The units of lag. Can be 'frames' or any time unit given as a string.
        errors : errors
            Calculate errors using Bayes (Refer to pyEMMA documentation)
        nits : int
            Number of implied timescales to calculate. Default: all
        results : bool
            If the method should return the calculated implied timescales
        plot : bool
            If the method should display the plot of implied timescales

        Returns
        -------
        If given `results`=True this method will return the following data
        its : np.ndarray
            The calculated implied timescales. 2D array with dimensions (len(`lags`), `nits`)
        lags : np.ndarray
            A list of the lag times that were used to calculate the implied timescales

        Examples
        --------
        >>> model = Model(data)
        >>> model.plotTimescales()
        >>> model.plotTimescales(lags=list(range(1,100,5)))
        """
        import pyemma.plots as mplt
        import pyemma.msm as msm
        self._integrityCheck()
        if lags is None:
            lags = self._defaultLags()
        else:
            lags = unitconvert(units, 'frames', lags, fstep=self.data.fstep).tolist()

        if nits is None:
            nits = np.min((self.data.K, 20))

        from htmd.config import _config
        its = msm.its(self.data.St.tolist(), lags=lags, errors=errors, nits=nits, n_jobs=_config['ncpus'])
        if plot:
            from matplotlib import pylab as plt
            plt.ion()
            plt.figure()
            mplt.plot_implied_timescales(its, dt=self.data.fstep, units='ns')
            plt.show()
        if results:
            return its.get_timescales(), its.lags
Beispiel #9
0
    def markovModel(self, lag, macronum, units='frames', sparse=False):
        """ Build a Markov model at a given lag time and calculate metastable states

        Parameters
        ----------
        lag : int
            The lag time at which to calculate the Markov state model. The units are specified with the `units` argument.
        macronum : int
            The number of macrostates (metastable states) to produce
        units : str
            The units of lag. Can be 'frames' or any time unit given as a string.
        sparse : bool
            Make the transition matrix sparse. Useful if lots (> 4000) states are used for the MSM. Warning: untested.

        Examples
        --------
        >>> model = Model(data)
        >>> model.markovModel(150, 4)  # 150 frames lag, 4 macrostates
        """
        import pyemma.msm as msm
        self._integrityCheck(markov=True)

        lag = unitconvert(units, 'frames', lag, fstep=self.data.fstep)

        self.lag = lag
        self.msm = msm.estimate_markov_model(self.data.St.tolist(),
                                             self.lag,
                                             sparse=sparse)
        self.P = self.msm.transition_matrix
        self.micro_ofcluster = -np.ones(self.data.K + 1, dtype=int)
        self.micro_ofcluster[self.msm.active_set] = np.arange(
            len(self.msm.active_set))
        self.cluster_ofmicro = self.msm.active_set
        self.micronum = len(self.msm.active_set)
        self.coarsemsm = self.msm.pcca(macronum)

        # Fixing pyemma macrostates
        self.macronum = len(set(self.msm.metastable_assignments))
        mask = np.ones(macronum, dtype=int) * -1
        mask[list(set(self.msm.metastable_assignments))] = range(self.macronum)

        self.macro_ofmicro = mask[self.msm.metastable_assignments]
        self.macro_ofcluster = -np.ones(self.data.K + 1, dtype=int)
        self.macro_ofcluster[self.msm.active_set] = self.macro_ofmicro
        logger.info('{:.1f}% of the data was used'.format(
            self.msm.active_count_fraction * 100))

        self._modelid = random.random()

        _macroTrajectoriesReport(
            self.macronum, _macroTrajSt(self.data.St, self.macro_ofcluster),
            self.data.simlist)
Beispiel #10
0
    def markovModel(self, lag, macronum, units='frames', sparse=False):
        """ Build a Markov model at a given lag time and calculate metastable states

        Parameters
        ----------
        lag : int
            The lag time at which to calculate the Markov state model. The units are specified with the `units` argument.
        macronum : int
            The number of macrostates (metastable states) to produce
        units : str
            The units of lag. Can be 'frames' or any time unit given as a string.
        sparse : bool
            Make the transition matrix sparse. Useful if lots (> 4000) states are used for the MSM. Warning: untested.

        Examples
        --------
        >>> model = Model(data)
        >>> model.markovModel(150, 4)  # 150 frames lag, 4 macrostates
        """
        import pyemma.msm as msm
        self._integrityCheck(markov=True)

        lag = unitconvert(units, 'frames', lag, fstep=self.data.fstep)

        self.lag = lag
        self.msm = msm.estimate_markov_model(self.data.St.tolist(), self.lag, sparse=sparse)
        self.P = self.msm.transition_matrix
        self.micro_ofcluster = -np.ones(self.data.K+1, dtype=int)
        self.micro_ofcluster[self.msm.active_set] = np.arange(len(self.msm.active_set))
        self.cluster_ofmicro = self.msm.active_set
        self.micronum = len(self.msm.active_set)
        self.coarsemsm = self.msm.pcca(macronum)

        # Fixing pyemma macrostates
        self.macronum = len(set(self.msm.metastable_assignments))
        mask = np.ones(macronum, dtype=int) * -1
        mask[list(set(self.msm.metastable_assignments))] = range(self.macronum)

        self.macro_ofmicro = mask[self.msm.metastable_assignments]
        self.macro_ofcluster = -np.ones(self.data.K+1, dtype=int)
        self.macro_ofcluster[self.msm.active_set] = self.macro_ofmicro
        logger.info('{:.1f}% of the data was used'.format(self.msm.active_count_fraction * 100))

        self._modelid = random.random()

        _macroTrajectoriesReport(self.macronum, _macroTrajSt(self.data.St, self.macro_ofcluster), self.data.simlist)
Beispiel #11
0
    def markovModel(self, lag, macronum, units='frames', sparse=False, hmm=False):
        """ Build a Markov model at a given lag time and calculate metastable states

        Parameters
        ----------
        lag : int
            The lag time at which to calculate the Markov state model. The units are specified with the `units` argument.
        macronum : int
            The number of macrostates (metastable states) to produce
        units : str
            The units of lag. Can be 'frames' or any time unit given as a string.
        sparse : bool
            Make the transition matrix sparse. Useful if lots (> 4000) states are used for the MSM. Warning: untested.

        Examples
        --------
        >>> model = Model(data)
        >>> model.markovModel(150, 4)  # 150 frames lag, 4 macrostates
        """
        import pyemma.msm as msm
        self._integrityCheck(markov=True)

        lag = unitconvert(units, 'frames', lag, fstep=self.data.fstep)

        self.lag = lag
        self.msm = msm.estimate_markov_model(self.data.St.tolist(), self.lag, sparse=sparse)
        modelflag = False
        while not modelflag:
            self.coarsemsm = self.msm.pcca(macronum)
            if len(np.unique(self.msm.metastable_assignments)) != macronum:
                macronum -= 1
                logger.warning('PCCA returned empty macrostates. Reducing the number of macrostates to {}.'.format(macronum))
            else:
                modelflag = True
            if macronum < 2:
                raise RuntimeError('Could not create even two macrostates. Please revise your clustering.')

        self._modelid = random.random()

        if hmm:  # Still in development
            self.hmm = self.msm.coarse_grain(self.macronum)

        logger.info('{:.1f}% of the data was used'.format(self.msm.active_count_fraction * 100))

        _macroTrajectoriesReport(self.macronum, _macroTrajSt(self.data.St, self.macro_ofcluster), self.data.simlist)
Beispiel #12
0
    def __init__(self, data, lag, units="frames", dimensions=None, njobs=None):
        from pyemma.coordinates.transform.tica import TICA as TICApyemma
        from tqdm import tqdm
        from htmd.util import _getNjobs

        self.data = data
        self.dimensions = dimensions
        self.njobs = njobs if njobs is not None else _getNjobs()

        if isinstance(
                data, Metric
        ):  # Memory efficient TICA projecting trajectories on the fly
            if units != "frames":
                raise RuntimeError(
                    "Cannot use delayed projection TICA with units other than frames for now. Report this to HTMD issues."
                )
            self.tic = TICApyemma(lag)
            metr = data

            pbar = tqdm(total=len(metr.simulations))
            for proj in _projectionGenerator(metr, self.njobs):
                for pro in proj:
                    if pro is None:
                        continue
                    if self.dimensions is None:
                        self.tic.partial_fit(pro[0])
                    else:  # Sub-select dimensions for fitting
                        self.tic.partial_fit(pro[0][:, self.dimensions])
                pbar.update(len(proj))
            pbar.close()
        else:  # In-memory TICA
            lag = unitconvert(units, "frames", lag, data.fstep)
            if lag == 0:
                raise RuntimeError(
                    "Lag time conversion resulted in 0 frames. Please use a larger lag-time for TICA."
                )

            self.tic = TICApyemma(lag)
            if self.dimensions is None:
                datalist = data.dat.tolist()
            else:  # Sub-select dimensions for fitting
                datalist = [x[:, self.dimensions].copy() for x in data.dat]
            self.tic.fit(datalist)
Beispiel #13
0
    def __init__(self, data, lag, units='frames'):
        from pyemma.coordinates import tica
        # data.dat.tolist() might be better?
        self.data = data
        if isinstance(data, Metric):
            from pyemma.coordinates.transform.tica import TICA
            lag = unitconvert(units, 'frames', lag, data.fstep)
            self.tic = TICA(lag)

            p = ProgressBar(len(data.simulations))
            for i in range(len(data.simulations)):
                # Fix for pyemma bug. Remove eventually:
                d, _, _ = data._projectSingle(i)
                if d is None or d.shape[0] < lag:
                    continue
                self.tic.partial_fit(d)
                p.progress()
            p.stop()
        else:
            self.tic = tica(data.dat.tolist(), lag=lag)
Beispiel #14
0
    def __init__(self, data, lag, units='frames'):
        from pyemma.coordinates import tica
        # data.dat.tolist() might be better?
        self.data = data
        if isinstance(data, Metric):
            from pyemma.coordinates.transform.tica import TICA
            lag = unitconvert(units, 'frames', lag, data.fstep)
            self.tic = TICA(lag)

            p = ProgressBar(len(data.simulations))
            for i in range(len(data.simulations)):
                # Fix for pyemma bug. Remove eventually:
                d, _, _ = data._projectSingle(i)
                if d is None or d.shape[0] < lag:
                    continue
                self.tic.partial_fit(d)
                p.progress()
            p.stop()
        else:
            self.tic = tica(data.dat.tolist(), lag=lag)
Beispiel #15
0
    def plotTimescales(self, lags=None, units='frames', errors=None, nits=None, results=False, plot=True, save=None):
        """ Plot the implied timescales of MSMs of various lag times

        Parameters
        ----------
        lags : list
            The lag times at which to compute the timescales. By default it spreads out 25 lag times linearly from lag
            10 until the mode length of the trajectories.
        units : str
            The units of lag. Can be 'frames' or any time unit given as a string.
        errors : errors
            Calculate errors using Bayes (Refer to pyEMMA documentation)
        nits : int
            Number of implied timescales to calculate. Default: all
        results : bool
            If the method should return the calculated implied timescales
        plot : bool
            If the method should display the plot of implied timescales
        save : str
            Path of the file in which to save the figure

        Returns
        -------
        If given results=True this method will return the following data
        its : np.ndarray
            The calculated implied timescales. 2D array with dimensions (len(`lags`), `nits`)
        lags : np.ndarray
            A list of the lag times that were used to calculate the implied timescales

        Examples
        --------
        >>> model = Model(data)
        >>> model.plotTimescales()
        >>> model.plotTimescales(lags=list(range(1,100,5)))
        """
        import pyemma.plots as mplt
        import pyemma.msm as msm
        self._integrityCheck()
        if lags is None:
            lags = self._defaultLags()
        else:
            lags = unitconvert(units, 'frames', lags, fstep=self.data.fstep).tolist()

        if nits is None:
            nits = np.min((self.data.K, 20))

        from htmd.config import _config
        its = msm.its(self.data.St.tolist(), lags=lags, errors=errors, nits=nits, n_jobs=_config['ncpus'])
        if plot or (save is not None):
            from matplotlib import pylab as plt
            plt.ion()
            plt.figure()
            try:
                mplt.plot_implied_timescales(its, dt=self.data.fstep, units='ns')
            except ValueError as ve:
                plt.close()
                raise ValueError('{} This is probably caused by badly set fstep in the data ({}). '.format(ve, self.data.fstep) +
                                 'Please correct the model.data.fstep to correspond to the simulation frame step in nanoseconds.')
            if save is not None:
                plt.savefig(save, dpi=300, bbox_inches='tight', pad_inches=0.2)
            if plot:
                plt.show()
        if results:
            return its.get_timescales(), its.lags
Beispiel #16
0
    def plotTimescales(self,
                       lags=None,
                       units='frames',
                       errors=None,
                       nits=None,
                       results=False,
                       plot=True):
        """ Plot the implied timescales of MSMs of various lag times

        Parameters
        ----------
        lags : list
            The lag times at which to compute the timescales. By default it spreads out 25 lag times linearly from lag
            10 until the mode length of the trajectories.
        units : str
            The units of lag. Can be 'frames' or any time unit given as a string.
        errors : errors
            Calculate errors using Bayes (Refer to pyEMMA documentation)
        nits : int
            Number of implied timescales to calculate. Default: all
        results : bool
            If the method should return the calculated implied timescales
        plot : bool
            If the method should display the plot of implied timescales

        Returns
        -------
        If given `results`=True this method will return the following data
        its : np.ndarray
            The calculated implied timescales. 2D array with dimensions (len(`lags`), `nits`)
        lags : np.ndarray
            A list of the lag times that were used to calculate the implied timescales

        Examples
        --------
        >>> model = Model(data)
        >>> model.plotTimescales()
        >>> model.plotTimescales(lags=list(range(1,100,5)))
        """
        import pyemma.plots as mplt
        import pyemma.msm as msm
        self._integrityCheck()
        if lags is None:
            lags = self._defaultLags()
        else:
            lags = unitconvert(units, 'frames', lags,
                               fstep=self.data.fstep).tolist()

        if nits is None:
            nits = np.min((self.data.K, 20))

        from htmd.config import _config
        its = msm.its(self.data.St.tolist(),
                      lags=lags,
                      errors=errors,
                      nits=nits,
                      n_jobs=_config['ncpus'])
        if plot:
            from matplotlib import pylab as plt
            plt.ion()
            plt.figure()
            mplt.plot_implied_timescales(its, dt=self.data.fstep, units='ns')
            plt.show()
        if results:
            return its.get_timescales(), its.lags
Beispiel #17
0
    def plotTimescales(self,
                       lags=None,
                       units='frames',
                       errors=None,
                       nits=None,
                       results=False,
                       plot=True,
                       save=None):
        """ Plot the implied timescales of MSMs of various lag times

        Parameters
        ----------
        lags : list
            The lag times at which to compute the timescales. By default it spreads out 25 lag times linearly from lag
            10 until the mode length of the trajectories.
        units : str
            The units of lag. Can be 'frames' or any time unit given as a string.
        errors : errors
            Calculate errors using Bayes (Refer to pyEMMA documentation)
        nits : int
            Number of implied timescales to calculate. Default: all
        results : bool
            If the method should return the calculated implied timescales
        plot : bool
            If the method should display the plot of implied timescales
        save : str
            Path of the file in which to save the figure

        Returns
        -------
        If given results=True this method will return the following data
        its : np.ndarray
            The calculated implied timescales. 2D array with dimensions (len(`lags`), `nits`)
        lags : np.ndarray
            A list of the lag times that were used to calculate the implied timescales

        Examples
        --------
        >>> model = Model(data)
        >>> model.plotTimescales()
        >>> model.plotTimescales(lags=list(range(1,100,5)))
        """
        import pyemma.plots as mplt
        import pyemma.msm as msm
        self._integrityCheck()
        if lags is None:
            lags = self._defaultLags()
        else:
            lags = unitconvert(units, 'frames', lags,
                               fstep=self.data.fstep).tolist()

        if nits is None:
            nits = np.min((self.data.K, 20))

        from htmd.config import _config
        its = msm.its(self.data.St.tolist(),
                      lags=lags,
                      errors=errors,
                      nits=nits,
                      n_jobs=_config['ncpus'])
        if plot or (save is not None):
            from matplotlib import pylab as plt
            plt.ion()
            plt.figure()
            try:
                mplt.plot_implied_timescales(its,
                                             dt=self.data.fstep,
                                             units='ns')
            except ValueError as ve:
                plt.close()
                raise ValueError(
                    '{} This is probably caused by badly set fstep in the data ({}). '
                    .format(ve, self.data.fstep) +
                    'Please correct the model.data.fstep to correspond to the simulation frame step in nanoseconds.'
                )
            if save is not None:
                plt.savefig(save, dpi=300, bbox_inches='tight', pad_inches=0.2)
            if plot:
                plt.show()
        if results:
            return its.get_timescales(), its.lags