Python _TimeUnit Examples, pyemma.util.units._TimeUnit Python Examples

Example #1

0

Show file

 def __init__(self,
              bias_energies_full,
              lag,
              count_mode='sliding',
              connectivity='largest',
              maxiter=10000,
              maxerr=1E-15,
              dt_traj='1 step',
              save_convergence_info=0,
              init=None):
     # set all parameters
     self.bias_energies_full = _types.ensure_ndarray(bias_energies_full,
                                                     ndim=2,
                                                     kind='numeric')
     self.lag = lag
     assert count_mode == 'sliding', 'Currently the only implemented count_mode is \'sliding\''
     self.count_mode = count_mode
     assert connectivity == 'largest', 'Currently the only implemented connectivity is \'largest\''
     self.connectivity = connectivity
     assert init in (
         None, 'wham'), 'Currently only None and \'wham\' are supported'
     self.init = init
     self.dt_traj = dt_traj
     self.maxiter = maxiter
     self.maxerr = maxerr
     self.save_convergence_info = save_convergence_info
     # set derived quantities
     self.nthermo, self.nstates_full = bias_energies_full.shape
     self.timestep_traj = _TimeUnit(dt_traj)
     # set iteration variables
     self.therm_energies = None
     self.conf_energies = None
     self.log_lagrangian_mult = None

Example #2

0

Show file

File: TRAM_estimator.py Project: yuhangwang/PyEMMA

    def __init__(self,
                 lag,
                 count_mode='sliding',
                 connectivity='summed_count_matrix',
                 ground_state=None,
                 nstates_full=None,
                 equilibrium=None,
                 maxiter=10000,
                 maxerr=1.0E-15,
                 save_convergence_info=0,
                 dt_traj='1 step',
                 nn=None,
                 connectivity_factor=1.0,
                 direct_space=False,
                 N_dtram_accelerations=0,
                 callback=None,
                 init='mbar',
                 init_maxiter=5000,
                 init_maxerr=1.0E-8,
                 overcounting_factor=1.0):

        self.lag = lag
        assert count_mode == 'sliding', 'Currently the only implemented count_mode is \'sliding\''
        self.count_mode = count_mode
        self.connectivity = connectivity
        self.nn = nn
        self.connectivity_factor = connectivity_factor
        self.dt_traj = dt_traj
        self.timestep_traj = _TimeUnit(dt_traj)
        self.ground_state = ground_state
        self.nstates_full = nstates_full
        self.equilibrium = equilibrium
        self.maxiter = maxiter
        self.maxerr = maxerr
        self.direct_space = direct_space
        self.N_dtram_accelerations = N_dtram_accelerations
        self.callback = callback
        self.save_convergence_info = save_convergence_info
        assert init in (
            None, 'mbar'), 'Currently only None and \'mbar\' are supported'
        self.init = init
        self.init_maxiter = init_maxiter
        self.init_maxerr = init_maxerr
        self.overcounting_factor = overcounting_factor
        self.active_set = None
        self.biased_conf_energies = None
        self.mbar_therm_energies = None
        self.log_lagrangian_mult = None
        self.loglikelihoods = None

Example #3

0

Show file

 def __init__(self,
              bias_energies_full,
              maxiter=10000,
              maxerr=1.0E-15,
              save_convergence_info=0,
              dt_traj='1 step',
              stride=1):
     self.bias_energies_full = _types.ensure_ndarray(bias_energies_full,
                                                     ndim=2,
                                                     kind='numeric')
     self.stride = stride
     self.dt_traj = dt_traj
     self.maxiter = maxiter
     self.maxerr = maxerr
     self.save_convergence_info = save_convergence_info
     # set derived quantities
     self.nthermo, self.nstates_full = bias_energies_full.shape
     self.timestep_traj = _TimeUnit(dt_traj)
     # set iteration variables
     self.therm_energies = None
     self.conf_energies = None

Example #4

0

Show file

    def __init__(self, lag=1, reversible=True, statdist_constraint=None,
                 count_mode='sliding', sparse=False,
                 connectivity='largest', dt_traj='1 step', maxiter=1000000,
                 maxerr=1e-8):
        r"""Maximum likelihood estimator for MSMs given discrete trajectory statistics

        Parameters
        ----------
        lag : int
            lag time at which transitions are counted and the transition matrix is
            estimated.

        reversible : bool, optional, default = True
            If true compute reversible MSM, else non-reversible MSM

        statdist : (M,) ndarray, optional
            Stationary vector on the full set of states. Estimation will be
            made such the the resulting transition matrix has this distribution
            as an equilibrium distribution. Set probabilities to zero if these
            states should be excluded from the analysis.

        count_mode : str, optional, default='sliding'
            mode to obtain count matrices from discrete trajectories. Should be
            one of:

            * 'sliding' : A trajectory of length T will have :math:`T-tau` counts
              at time indexes

              .. math::

                 (0 \rightarrow \tau), (1 \rightarrow \tau+1), ..., (T-\tau-1 \rightarrow T-1)

            * 'effective' : Uses an estimate of the transition counts that are
              statistically uncorrelated. Recommended when used with a
              Bayesian MSM.
            * 'sample' : A trajectory of length T will have :math:`T/tau` counts
              at time indexes

              .. math::

                    (0 \rightarrow \tau), (\tau \rightarrow 2 \tau), ..., (((T/tau)-1) \tau \rightarrow T)

        sparse : bool, optional, default = False
            If true compute count matrix, transition matrix and all derived
            quantities using sparse matrix algebra. In this case python sparse
            matrices will be returned by the corresponding functions instead of
            numpy arrays. This behavior is suggested for very large numbers of
            states (e.g. > 4000) because it is likely to be much more efficient.
        connectivity : str, optional, default = 'largest'
            Connectivity mode. Three methods are intended (currently only 'largest'
            is implemented)

            * 'largest' : The active set is the largest reversibly connected set.
              All estimation will be done on this subset and all quantities
              (transition matrix, stationary distribution, etc) are only defined
              on this subset and are correspondingly smaller than the full set
              of states
            * 'all' : The active set is the full set of states. Estimation will be
              conducted on each reversibly connected set separately. That means
              the transition matrix will decompose into disconnected submatrices,
              the stationary vector is only defined within subsets, etc.
              Currently not implemented.
            * 'none' : The active set is the full set of states. Estimation will
              be conducted on the full set of
              states without ensuring connectivity. This only permits
              nonreversible estimation. Currently not implemented.

        dt_traj : str, optional, default='1 step'
            Description of the physical time of the input trajectories. May be used
            by analysis algorithms such as plotting tools to pretty-print the axes.
            By default '1 step', i.e. there is no physical time unit. Specify by a
            number, whitespace and unit. Permitted units are (* is an arbitrary
            string):

            |  'fs',  'femtosecond*'
            |  'ps',  'picosecond*'
            |  'ns',  'nanosecond*'
            |  'us',  'microsecond*'
            |  'ms',  'millisecond*'
            |  's',   'second*'

        maxiter: int, optioanl, default = 1000000
            Optional parameter with reversible = True. maximum number of iterations
            before the transition matrix estimation method exits
        maxerr : float, optional, default = 1e-8
            Optional parameter with reversible = True.
            convergence tolerance for transition matrix estimation.
            This specifies the maximum change of the Euclidean norm of relative
            stationary probabilities (:math:`x_i = \sum_k x_{ik}`). The relative
            stationary probability changes
            :math:`e_i = (x_i^{(1)} - x_i^{(2)})/(x_i^{(1)} + x_i^{(2)})` are used
            in order to track changes in small probabilities. The Euclidean norm
            of the change vector, :math:`|e_i|_2`, is compared to maxerr.

        """
        self.lag = lag

        # set basic parameters
        self.reversible = reversible
        self.statdist_constraint = _types.ensure_ndarray_or_None(statdist_constraint, ndim=None, kind='numeric')
        if self.statdist_constraint is not None:  # renormalize
            self.statdist_constraint /= self.statdist_constraint.sum()

        # sparse matrix computation wanted?
        self.sparse = sparse

        # store counting mode (lowercase)
        self.count_mode = str(count_mode).lower()
        if self.count_mode not in ('sliding', 'effective', 'sample'):
            raise ValueError('count mode ' + count_mode + ' is unknown.')

        # store connectivity mode (lowercase)
        self.connectivity = connectivity.lower()
        if self.connectivity == 'largest':
            pass  # this is the current default. no need to do anything
        elif self.connectivity == 'all':
            raise NotImplementedError('MSM estimation with connectivity=\'all\' is currently not implemented.')
        elif self.connectivity == 'none':
            raise NotImplementedError('MSM estimation with connectivity=\'none\' is currently not implemented.')
        else:
            raise ValueError('connectivity mode ' + str(connectivity) + ' is unknown.')

        # time step
        self.dt_traj = dt_traj
        self.timestep_traj = _TimeUnit(dt_traj)

        # convergence parameters
        self.maxiter = maxiter
        self.maxerr = maxerr

Example #5

0

Show file

File: _msm_estimator_base.py Project: markovmodel/PyEMMA

 def dt_traj(self, value):
     # time step
     self._dt_traj = value
     self.timestep_traj = _TimeUnit(self.dt_traj)

Example #6

0

Show file

    def __init__(self,
                 lag,
                 count_mode='sliding',
                 connectivity='summed_count_matrix',
                 ground_state=None,
                 nstates_full=None,
                 equilibrium=None,
                 maxiter=10000,
                 maxerr=1.0E-15,
                 save_convergence_info=0,
                 dt_traj='1 step',
                 nn=None,
                 connectivity_factor=1.0,
                 direct_space=False,
                 N_dtram_accelerations=0,
                 callback=None,
                 init='mbar',
                 init_maxiter=5000,
                 init_maxerr=1.0E-8,
                 overcounting_factor=1.0):
        r"""Transition(-based) Reweighting Analysis Method

        Parameters
        ----------
        lag : int
            Integer lag time at which transitions are counted.
        count_mode : str, optional, default='sliding'
            mode to obtain count matrices from discrete trajectories. Should be
            one of:
            * 'sliding' : A trajectory of length T will have :math:`T-\tau` counts at time indexes
                  .. math::
                     (0 \rightarrow \tau), (1 \rightarrow \tau+1), ..., (T-\tau-1 \rightarrow T-1)
            * 'sample' : A trajectory of length T will have :math:`T/\tau` counts
              at time indexes
                  .. math::
                        (0 \rightarrow \tau), (\tau \rightarrow 2 \tau), ..., ((T/\tau-1) \tau \rightarrow T)
            Currently only 'sliding' is supported.
        connectivity : str, optional, default='summed_count_matrix'
            One of 'summed_count_matrix', 'strong_in_every_ensemble',
            'neighbors', 'post_hoc_RE' or 'BAR_variance'.
            Defines what should be considered a connected set in the joint space
            of conformations and thermodynamic ensembles.
            For details see thermotools.cset.compute_csets_TRAM.
        ground_state : int, optional, default=None
            Index of the unbiased thermodynamic state or None if there is no unbiased data available.
        nstates_full : int, optional, default=None
            Number of cluster centers, i.e., the size of the full set of states.
        equilibrium : list of booleans, optional 
            For every trajectory triple (ttraj[i], dtraj[i], btraj[i]), indicates
            whether to assume global equilibrium. If true, the triple is not used
            for computing kinetic quantities (but only thermodynamic quantities).
            By default, no trajectory is assumed to be in global equilibrium.
            This is the TRAMMBAR extension.
        maxiter : int, optional, default=10000
            The maximum number of self-consistent iterations before the estimator exits unsuccessfully.
        maxerr : float, optional, default=1E-15
            Convergence criterion based on the maximal free energy change in a self-consistent
            iteration step.
        save_convergence_info : int, optional, default=0
            Every save_convergence_info iteration steps, store the actual increment
            and the actual loglikelihood; 0 means no storage.
        dt_traj : str, optional, default='1 step'
            Description of the physical time corresponding to the lag. May be used by analysis
            algorithms such as plotting tools to pretty-print the axes. By default '1 step', i.e.
            there is no physical time unit.  Specify by a number, whitespace and unit. Permitted
            units are (* is an arbitrary string):

            |  'fs',   'femtosecond*'
            |  'ps',   'picosecond*'
            |  'ns',   'nanosecond*'
            |  'us',   'microsecond*'
            |  'ms',   'millisecond*'
            |  's',    'second*'
        nn : int, optional, default=None
            Only needed if connectivity='neighbors'
            See thermotools.cset.compute_csets_TRAM.
        connectivity_factor : float, optional, default=1.0
            Only needed if connectivity='post_hoc_RE' or 'BAR_variance'. Weakens the connectivity
            requirement, see thermotools.cset.compute_csets_TRAM.
        direct_space : bool, optional, default=False
            Whether to perform the self-consitent iteration with Boltzmann factors
            (direct space) or free energies (log-space). When analyzing data from
            multi-temperature simulations, direct-space is not recommended.
        N_dtram_accelerations : int, optional, default=0
            Convergence of TRAM can be speeded up by interleaving the updates
            in the self-consitent iteration with a dTRAM-like update step.
            N_dtram_accelerations says how many times the dTRAM-like update
            step should be applied in every iteration of the TRAM equations.
            Currently this is only effective if direct_space=True.
        init : str, optional, default=None
            Use a specific initialization for self-consistent iteration:

            | None:    use a hard-coded guess for free energies and Lagrangian multipliers
            | 'mbar':  perform a short MBAR estimate to initialize the free energies
        init_maxiter : int, optional, default=5000
            The maximum number of self-consistent iterations during the initialization.
        init_maxerr : float, optional, default=1.0E-8
            Convergence criterion for the initialization.
        overcounting_factor : double, default = 1.0
            Only needed if equilibrium contains True (TRAMMBAR).
            Sets the relative statistical weight of equilibrium and non-equilibrium
            frames. An overcounting_factor of value n means that every
            non-equilibrium frame is counted n times. Values larger than 1 increase
            the relative weight of the non-equilibrium data. Values less than 1
            increase the relative weight of the equilibrium data.


        References
        ----------

        .. [1] Wu, H. et al 2016
            Multiensemble Markov models of molecular thermodynamics and kinetics
            Proc. Natl. Acad. Sci. USA 113 E3221--E3230

        """
        self.lag = lag
        assert count_mode == 'sliding', 'Currently the only implemented count_mode is \'sliding\''
        self.count_mode = count_mode
        self.connectivity = connectivity
        self.nn = nn
        self.connectivity_factor = connectivity_factor
        self.dt_traj = dt_traj
        self.timestep_traj = _TimeUnit(dt_traj)
        self.ground_state = ground_state
        self.nstates_full = nstates_full
        self.equilibrium = equilibrium
        self.maxiter = maxiter
        self.maxerr = maxerr
        self.direct_space = direct_space
        self.N_dtram_accelerations = N_dtram_accelerations
        self.callback = callback
        self.save_convergence_info = save_convergence_info
        assert init in (
            None, 'mbar'), 'Currently only None and \'mbar\' are supported'
        self.init = init
        self.init_maxiter = init_maxiter
        self.init_maxerr = init_maxerr
        self.overcounting_factor = overcounting_factor
        self.active_set = None
        self.biased_conf_energies = None
        self.mbar_therm_energies = None
        self.log_lagrangian_mult = None
        self.loglikelihoods = None

Example #7

0

Show file

File: TRAM_estimator.py Project: noinil/PyEMMA

def __init__(self,
lag,
count_mode='sliding',
connectivity='post_hoc_RE',
nstates_full=None,
equilibrium=None,
maxiter=10000,
maxerr=1.0E-15,
save_convergence_info=0,
dt_traj='1 step',
nn=None,
connectivity_factor=1.0,
direct_space=False,
N_dtram_accelerations=0,
callback=None,
init='mbar',
init_maxiter=5000,
init_maxerr=1.0E-8,
overcounting_factor=1.0):
r"""Transition(-based) Reweighting Analysis Method

Parameters
----------
lag : int
Integer lag time at which transitions are counted.
count_mode : str, optional, default='sliding'
mode to obtain count matrices from discrete trajectories. Should be
one of:
* 'sliding' : A trajectory of length T will have :math:`T-\tau` counts at time indexes
.. math::
(0 \rightarrow \tau), (1 \rightarrow \tau+1), ..., (T-\tau-1 \rightarrow T-1)
* 'sample' : A trajectory of length T will have :math:`T/\tau` counts
at time indexes
.. math::
(0 \rightarrow \tau), (\tau \rightarrow 2 \tau), ..., ((T/\tau-1) \tau \rightarrow T)
Currently only 'sliding' is supported.
connectivity : str, optional, default='post_hoc_RE'
One of 'post_hoc_RE', 'BAR_variance', 'reversible_pathways' or
'summed_count_matrix'. Defines what should be considered a connected set
in the joint (product) space of conformations and thermodynamic ensembles.
* 'reversible_pathways' : requires that every state in the connected set
can be reached by following a pathway of reversible transitions. A
reversible transition between two Markov states (within the same
thermodynamic state k) is a pair of Markov states that belong to the
same strongly connected component of the count matrix (from
thermodynamic state k). A pathway of reversible transitions is a list of
reversible transitions [(i_1, i_2), (i_2, i_3),..., (i_(N-2), i_(N-1)),
(i_(N-1), i_N)]. The thermodynamic state where the reversible
transitions happen, is ignored in constructing the reversible pathways.
This is equivalent to assuming that two ensembles overlap at some Markov
state whenever there exist frames from both ensembles in that Markov
state.
* 'post_hoc_RE' : similar to 'reversible_pathways' but with a more strict
requirement for the overlap between thermodynamic states. It is required
that every state in the connected set can be reached by following a
pathway of reversible transitions or jumping between overlapping
thermodynamic states while staying in the same Markov state. A reversible
transition between two Markov states (within the same thermodynamic
state k) is a pair of Markov states that belong to the same strongly
connected component of the count matrix (from thermodynamic state k).
Two thermodynamic states k and l are defined to overlap at Markov state
n if a replica exchange simulation [2]_ restricted to state n would show
at least one transition from k to l or one transition from from l to k.
The expected number of replica exchanges is estimated from the
simulation data. The minimal number required of replica exchanges
per Markov state can be increased by decreasing `connectivity_factor`.
* 'BAR_variance' : like 'post_hoc_RE' but with a different condition to
define the thermodynamic overlap based on the variance of the BAR
estimator [3]_. Two thermodynamic states k and l are defined to overlap
at Markov state n if the variance of the free energy difference Delta
f_{kl} computed with BAR (and restricted to conformations form Markov
state n) is less or equal than one. The minimally required variance
can be controlled with `connectivity_factor`.
* 'summed_count_matrix' : all thermodynamic states are assumed to overlap.
The connected set is then computed by summing the count matrices over
all thermodynamic states and taking it's largest strongly connected set.
Not recommended!
For more details see :func:`thermotools.cset.compute_csets_TRAM`.
nstates_full : int, optional, default=None
Number of cluster centers, i.e., the size of the full set of states.
equilibrium : list of booleans, optional
For every trajectory triple (ttraj[i], dtraj[i], btraj[i]), indicates
whether to assume global equilibrium. If true, the triple is not used
for computing kinetic quantities (but only thermodynamic quantities).
By default, no trajectory is assumed to be in global equilibrium.
This is the TRAMMBAR extension.
maxiter : int, optional, default=10000
The maximum number of self-consistent iterations before the estimator exits unsuccessfully.
maxerr : float, optional, default=1E-15
Convergence criterion based on the maximal free energy change in a self-consistent
iteration step.
save_convergence_info : int, optional, default=0
Every save_convergence_info iteration steps, store the actual increment
and the actual log-likelihood; 0 means no storage.
dt_traj : str, optional, default='1 step'
Description of the physical time corresponding to the lag. May be used by analysis
algorithms such as plotting tools to pretty-print the axes. By default '1 step', i.e.
there is no physical time unit. Specify by a number, whitespace and unit. Permitted
units are (* is an arbitrary string):

| 'fs', 'femtosecond*'
| 'ps', 'picosecond*'
| 'ns', 'nanosecond*'
| 'us', 'microsecond*'
| 'ms', 'millisecond*'
| 's', 'second*'
connectivity_factor : float, optional, default=1.0
Only needed if connectivity='post_hoc_RE' or 'BAR_variance'. Values
greater than 1.0 weaken the connectivity conditions. For 'post_hoc_RE'
this multiplies the number of hypothetically observed transitions. For
'BAR_variance' this scales the threshold for the minimal allowed variance
of free energy differences.
direct_space : bool, optional, default=False
Whether to perform the self-consistent iteration with Boltzmann factors
(direct space) or free energies (log-space). When analyzing data from
multi-temperature simulations, direct-space is not recommended.
N_dtram_accelerations : int, optional, default=0
Convergence of TRAM can be speeded up by interleaving the updates
in the self-consistent iteration with a dTRAM-like update step.
N_dtram_accelerations says how many times the dTRAM-like update
step should be applied in every iteration of the TRAM equations.
Currently this is only effective if direct_space=True.
init : str, optional, default=None
Use a specific initialization for self-consistent iteration:

| None: use a hard-coded guess for free energies and Lagrangian multipliers
| 'mbar': perform a short MBAR estimate to initialize the free energies
init_maxiter : int, optional, default=5000
The maximum number of self-consistent iterations during the initialization.
init_maxerr : float, optional, default=1.0E-8
Convergence criterion for the initialization.
overcounting_factor : double, default = 1.0
Only needed if equilibrium contains True (TRAMMBAR).
Sets the relative statistical weight of equilibrium and non-equilibrium
frames. An overcounting_factor of value n means that every
non-equilibrium frame is counted n times. Values larger than 1 increase
the relative weight of the non-equilibrium data. Values less than 1
increase the relative weight of the equilibrium data.

References
----------
.. [1] Wu, H. et al 2016
Multiensemble Markov models of molecular thermodynamics and kinetics
Proc. Natl. Acad. Sci. USA 113 E3221--E3230
.. [2]_ Hukushima et al, Exchange Monte Carlo method and application to spin
glass simulations, J. Phys. Soc. Jan. 65, 1604 (1996)
.. [3]_ Shirts and Chodera, Statistically optimal analysis of samples
from multiple equilibrium states, J. Chem. Phys. 129, 124105 (2008)

"""
self.lag = lag
assert count_mode == 'sliding', 'Currently the only implemented count_mode is \'sliding\''
self.count_mode = count_mode
self.connectivity = connectivity
self.nn = nn
self.connectivity_factor = connectivity_factor
self.dt_traj = dt_traj
self.timestep_traj = _TimeUnit(dt_traj)
self.nstates_full = nstates_full
self.equilibrium = equilibrium
self.maxiter = maxiter
self.maxerr = maxerr
self.direct_space = direct_space
self.N_dtram_accelerations = N_dtram_accelerations
self.callback = callback
self.save_convergence_info = save_convergence_info
assert init in (
None, 'mbar'), 'Currently only None and \'mbar\' are supported'
self.init = init
self.init_maxiter = init_maxiter
self.init_maxerr = init_maxerr
self.overcounting_factor = overcounting_factor
self.active_set = None
self.biased_conf_energies = None
self.mbar_therm_energies = None
self.log_lagrangian_mult = None
self.loglikelihoods = None

Example #8

0

Show file

    def __init__(
        self, bias_energies_full, lag, count_mode='sliding', connectivity='largest',
        maxiter=10000, maxerr=1.0E-15, save_convergence_info=0, dt_traj='1 step',
        init=None, init_maxiter=10000, init_maxerr=1.0E-8):
        r""" Discrete Transition(-based) Reweighting Analysis Method

        Parameters
        ----------
        bias_energies_full : numpy.ndarray(shape=(num_therm_states, num_conf_states)) object
            bias_energies_full[j, i] is the bias energy in units of kT for each discrete state i
            at thermodynamic state j.
        lag : int
            Integer lag time at which transitions are counted.
        count_mode : str, optional, default='sliding'
            Mode to obtain count matrices from discrete trajectories. Should be one of:
            * 'sliding' : a trajectory of length T will have :math:`T-\tau` counts at time indexes
                  .. math::
                     (0 \rightarrow \tau), (1 \rightarrow \tau+1), ..., (T-\tau-1 \rightarrow T-1)
            * 'sample' : a trajectory of length T will have :math:`T/\tau` counts at time indexes
                  .. math::
                        (0 \rightarrow \tau), (\tau \rightarrow 2 \tau), ..., ((T/\tau-1) \tau \rightarrow T)
            Currently only 'sliding' is supported.
        connectivity : str, optional, default='largest'
            Defines what should be considered a connected set in the joint space of conformations and
            thermodynamic ensembles. Currently only 'largest' is supported.
        maxiter : int, optional, default=10000
            The maximum number of self-consistent iterations before the estimator exits unsuccessfully.
        maxerr : float, optional, default=1.0E-15
            Convergence criterion based on the maximal free energy change in a self-consistent
            iteration step.
        save_convergence_info : int, optional, default=0
            Every save_convergence_info iteration steps, store the actual increment
            and the actual loglikelihood; 0 means no storage.
        dt_traj : str, optional, default='1 step'
            Description of the physical time corresponding to the lag. May be used by analysis
            algorithms such as plotting tools to pretty-print the axes. By default '1 step', i.e.
            there is no physical time unit.  Specify by a number, whitespace and unit. Permitted
            units are (* is an arbitrary string):

            |  'fs',   'femtosecond*'
            |  'ps',   'picosecond*'
            |  'ns',   'nanosecond*'
            |  'us',   'microsecond*'
            |  'ms',   'millisecond*'
            |  's',    'second*'
        init : str, optional, default=None
            Use a specific initialization for self-consistent iteration:

            | None:    use a hard-coded guess for free energies and Lagrangian multipliers
            | 'wham':  perform a short WHAM estimate to initialize the free energies
        init_maxiter : int, optional, default=10000
            The maximum number of self-consistent iterations during the initialization.
        init_maxerr : float, optional, default=1.0E-8
            Convergence criterion for the initialization.

        Example
        -------
        >>> from pyemma.thermo import DTRAM
        >>> import numpy as np
        >>> B = np.array([[0, 0],[0.5, 1.0]])
        >>> dtram = DTRAM(B, 1)
        >>> ttrajs = [np.array([0,0,0,0,0,0,0,0,0,0]),np.array([1,1,1,1,1,1,1,1,1,1])]
        >>> dtrajs = [np.array([0,0,0,0,1,1,1,0,0,0]),np.array([0,1,0,1,0,1,1,0,0,1])]
        >>> dtram = dtram.estimate((ttrajs, dtrajs))
        >>> dtram.log_likelihood() # doctest: +ELLIPSIS
        -9.805...
        >>> dtram.count_matrices # doctest: +SKIP
        array([[[5, 1],
                [1, 2]],

               [[1, 4],
                [3, 1]]], dtype=int32)
        >>> dtram.stationary_distribution # doctest: +ELLIPSIS
        array([ 0.38...,  0.61...])
        >>> dtram.meval('stationary_distribution') # doctest: +ELLIPSIS
        [array([ 0.38...,  0.61...]), array([ 0.50...,  0.49...])]

        References
        ----------

        .. [1] Wu, H. et al 2014
            Statistically optimal analysis of state-discretized trajectory data from multiple thermodynamic states
            J. Chem. Phys. 141, 214106

        """
        # set all parameters
        self.bias_energies_full = _types.ensure_ndarray(bias_energies_full, ndim=2, kind='numeric')
        self.lag = lag
        assert count_mode == 'sliding', 'Currently the only implemented count_mode is \'sliding\''
        self.count_mode = count_mode
        assert connectivity == 'largest', 'Currently the only implemented connectivity is \'largest\''
        self.connectivity = connectivity
        self.dt_traj = dt_traj
        self.maxiter = maxiter
        self.maxerr = maxerr
        self.save_convergence_info = save_convergence_info
        assert init in (None, 'wham'), 'Currently only None and \'wham\' are supported'
        self.init = init
        self.init_maxiter = init_maxiter
        self.init_maxerr = init_maxerr
        # set derived quantities
        self.nthermo, self.nstates_full = bias_energies_full.shape
        self.timestep_traj = _TimeUnit(dt_traj)
        # set iteration variables
        self.therm_energies = None
        self.conf_energies = None
        self.log_lagrangian_mult = None

Example #9

0

Show file

File: maximum_likelihood_msm.py Project: zzmjohn/PyEMMA

    def __init__(self, lag=1, reversible=True, count_mode='sliding', sparse=False,
                 connectivity='largest', dt_traj='1 step'):
        r"""Maximum likelihood estimator for MSMs given discrete trajectory statistics

        Parameters
        ----------
        lag : int
            lag time at which transitions are counted and the transition matrix is
            estimated.

        reversible : bool, optional, default = True
            If true compute reversible MSM, else non-reversible MSM

        count_mode : str, optional, default='sliding'
            mode to obtain count matrices from discrete trajectories. Should be
            one of:

            * 'sliding' : A trajectory of length T will have :math:`T-tau` counts
              at time indexes

              .. math::

                 (0 \rightarrow \tau), (1 \rightarrow \tau+1), ..., (T-\tau-1 \rightarrow T-1)

            * 'effective' : Uses an estimate of the transition counts that are
              statistically uncorrelated. Recommended when used with a
              Bayesian MSM.
            * 'sample' : A trajectory of length T will have :math:`T/tau` counts
              at time indexes

              .. math::

                    (0 \rightarrow \tau), (\tau \rightarrow 2 \tau), ..., (((T/tau)-1) \tau \rightarrow T)

        sparse : bool, optional, default = False
            If true compute count matrix, transition matrix and all derived
            quantities using sparse matrix algebra. In this case python sparse
            matrices will be returned by the corresponding functions instead of
            numpy arrays. This behavior is suggested for very large numbers of
            states (e.g. > 4000) because it is likely to be much more efficient.
        connectivity : str, optional, default = 'largest'
            Connectivity mode. Three methods are intended (currently only 'largest'
            is implemented)

            * 'largest' : The active set is the largest reversibly connected set.
              All estimation will be done on this subset and all quantities
              (transition matrix, stationary distribution, etc) are only defined
              on this subset and are correspondingly smaller than the full set
              of states
            * 'all' : The active set is the full set of states. Estimation will be
              conducted on each reversibly connected set separately. That means
              the transition matrix will decompose into disconnected submatrices,
              the stationary vector is only defined within subsets, etc.
              Currently not implemented.
            * 'none' : The active set is the full set of states. Estimation will
              be conducted on the full set of
              states without ensuring connectivity. This only permits
              nonreversible estimation. Currently not implemented.

        dt_traj : str, optional, default='1 step'
            Description of the physical time of the input trajectories. May be used
            by analysis algorithms such as plotting tools to pretty-print the axes.
            By default '1 step', i.e. there is no physical time unit. Specify by a
            number, whitespace and unit. Permitted units are (* is an arbitrary
            string):

            |  'fs',  'femtosecond*'
            |  'ps',  'picosecond*'
            |  'ns',  'nanosecond*'
            |  'us',  'microsecond*'
            |  'ms',  'millisecond*'
            |  's',   'second*'

        """
        self.lag = lag

        # set basic parameters
        self.reversible = reversible

        # sparse matrix computation wanted?
        self.sparse = sparse

        # store counting mode (lowercase)
        self.count_mode = str(count_mode).lower()
        if self.count_mode not in ('sliding', 'effective', 'sample'):
            raise ValueError('count mode ' + count_mode + ' is unknown.')

        # store connectivity mode (lowercase)
        self.connectivity = connectivity.lower()
        if self.connectivity == 'largest':
            pass  # this is the current default. no need to do anything
        elif self.connectivity == 'all':
            raise NotImplementedError('MSM estimation with connectivity=\'all\' is currently not implemented.')
        elif self.connectivity == 'none':
            raise NotImplementedError('MSM estimation with connectivity=\'none\' is currently not implemented.')
        else:
            raise ValueError('connectivity mode ' + str(connectivity) + ' is unknown.')

        # time step
        self.dt_traj = dt_traj
        self.timestep_traj = _TimeUnit(dt_traj)

Example #10

0

Show file

    def __init__(self,
                 bias_energies_full,
                 maxiter=10000,
                 maxerr=1.0E-15,
                 save_convergence_info=0,
                 dt_traj='1 step',
                 stride=1):
        r"""Weighted Histogram Analysis Method

        Parameters
        ----------
        bias_energies_full : numpy.ndarray(shape=(num_therm_states, num_conf_states)) object
            bias_energies_full[j, i] is the bias energy in units of kT for each discrete state i
            at thermodynamic state j.
        maxiter : int, optional, default=10000
            The maximum number of self-consistent iterations before the estimator exits unsuccessfully.
        maxerr : float, optional, default=1.0E-15
            Convergence criterion based on the maximal free energy change in a self-consistent
            iteration step.
        save_convergence_info : int, optional, default=0
            Every save_convergence_info iteration steps, store the actual increment
            and the actual loglikelihood; 0 means no storage.
        dt_traj : str, optional, default='1 step'
            Description of the physical time corresponding to the lag. May be used by analysis
            algorithms such as plotting tools to pretty-print the axes. By default '1 step', i.e.
            there is no physical time unit.  Specify by a number, whitespace and unit. Permitted
            units are (* is an arbitrary string):

            |  'fs',   'femtosecond*'
            |  'ps',   'picosecond*'
            |  'ns',   'nanosecond*'
            |  'us',   'microsecond*'
            |  'ms',   'millisecond*'
            |  's',    'second*'
        stride : int, optional, default=1
            not used

        Example
        -------
        >>> from pyemma.thermo import WHAM
        >>> import numpy as np
        >>> B = np.array([[0, 0],[0.5, 1.0]])
        >>> wham = WHAM(B)
        >>> ttrajs = [np.array([0,0,0,0,0,0,0,0,0,0]),np.array([1,1,1,1,1,1,1,1,1,1])]
        >>> dtrajs = [np.array([0,0,0,0,1,1,1,0,0,0]),np.array([0,1,0,1,0,1,1,0,0,1])]
        >>> wham = wham.estimate((ttrajs, dtrajs))
        >>> wham.log_likelihood() # doctest: +ELLIPSIS
        -6.6...
        >>> wham.state_counts # doctest: +SKIP
        array([[7, 3],
               [5, 5]])
        >>> wham.stationary_distribution # doctest: +ELLIPSIS +REPORT_NDIFF
        array([ 0.5...,  0.4...])
        >>> wham.meval('stationary_distribution') # doctest: +ELLIPSIS +REPORT_NDIFF
        [array([ 0.5...,  0.4...]), array([ 0.6...,  0.3...])]

        References
        ----------
        
        .. [1] Ferrenberg, A.M. and Swensen, R.H. 1988.
            New Monte Carlo Technique for Studying Phase Transitions.
            Phys. Rev. Lett. 23, 2635--2638

        .. [2] Kumar, S. et al 1992.
            The Weighted Histogram Analysis Method for Free-Energy Calculations on Biomolecules. I. The Method.
            J. Comp. Chem. 13, 1011--1021

        """
        self.bias_energies_full = _types.ensure_ndarray(bias_energies_full,
                                                        ndim=2,
                                                        kind='numeric')
        self.stride = stride
        self.dt_traj = dt_traj
        self.maxiter = maxiter
        self.maxerr = maxerr
        self.save_convergence_info = save_convergence_info
        # set derived quantities
        self.nthermo, self.nstates_full = bias_energies_full.shape
        self.timestep_traj = _TimeUnit(dt_traj)
        # set iteration variables
        self.therm_energies = None
        self.conf_energies = None