Example #1
0
 def __init__(self,
              bias_energies_full,
              lag,
              count_mode='sliding',
              connectivity='largest',
              maxiter=10000,
              maxerr=1E-15,
              dt_traj='1 step',
              save_convergence_info=0,
              init=None):
     # set all parameters
     self.bias_energies_full = _types.ensure_ndarray(bias_energies_full,
                                                     ndim=2,
                                                     kind='numeric')
     self.lag = lag
     assert count_mode == 'sliding', 'Currently the only implemented count_mode is \'sliding\''
     self.count_mode = count_mode
     assert connectivity == 'largest', 'Currently the only implemented connectivity is \'largest\''
     self.connectivity = connectivity
     assert init in (
         None, 'wham'), 'Currently only None and \'wham\' are supported'
     self.init = init
     self.dt_traj = dt_traj
     self.maxiter = maxiter
     self.maxerr = maxerr
     self.save_convergence_info = save_convergence_info
     # set derived quantities
     self.nthermo, self.nstates_full = bias_energies_full.shape
     self.timestep_traj = _TimeUnit(dt_traj)
     # set iteration variables
     self.therm_energies = None
     self.conf_energies = None
     self.log_lagrangian_mult = None
Example #2
0
    def __init__(self,
                 lag,
                 count_mode='sliding',
                 connectivity='summed_count_matrix',
                 ground_state=None,
                 nstates_full=None,
                 equilibrium=None,
                 maxiter=10000,
                 maxerr=1.0E-15,
                 save_convergence_info=0,
                 dt_traj='1 step',
                 nn=None,
                 connectivity_factor=1.0,
                 direct_space=False,
                 N_dtram_accelerations=0,
                 callback=None,
                 init='mbar',
                 init_maxiter=5000,
                 init_maxerr=1.0E-8,
                 overcounting_factor=1.0):

        self.lag = lag
        assert count_mode == 'sliding', 'Currently the only implemented count_mode is \'sliding\''
        self.count_mode = count_mode
        self.connectivity = connectivity
        self.nn = nn
        self.connectivity_factor = connectivity_factor
        self.dt_traj = dt_traj
        self.timestep_traj = _TimeUnit(dt_traj)
        self.ground_state = ground_state
        self.nstates_full = nstates_full
        self.equilibrium = equilibrium
        self.maxiter = maxiter
        self.maxerr = maxerr
        self.direct_space = direct_space
        self.N_dtram_accelerations = N_dtram_accelerations
        self.callback = callback
        self.save_convergence_info = save_convergence_info
        assert init in (
            None, 'mbar'), 'Currently only None and \'mbar\' are supported'
        self.init = init
        self.init_maxiter = init_maxiter
        self.init_maxerr = init_maxerr
        self.overcounting_factor = overcounting_factor
        self.active_set = None
        self.biased_conf_energies = None
        self.mbar_therm_energies = None
        self.log_lagrangian_mult = None
        self.loglikelihoods = None
Example #3
0
 def __init__(self,
              bias_energies_full,
              maxiter=10000,
              maxerr=1.0E-15,
              save_convergence_info=0,
              dt_traj='1 step',
              stride=1):
     self.bias_energies_full = _types.ensure_ndarray(bias_energies_full,
                                                     ndim=2,
                                                     kind='numeric')
     self.stride = stride
     self.dt_traj = dt_traj
     self.maxiter = maxiter
     self.maxerr = maxerr
     self.save_convergence_info = save_convergence_info
     # set derived quantities
     self.nthermo, self.nstates_full = bias_energies_full.shape
     self.timestep_traj = _TimeUnit(dt_traj)
     # set iteration variables
     self.therm_energies = None
     self.conf_energies = None
Example #4
0
    def __init__(self, lag=1, reversible=True, statdist_constraint=None,
                 count_mode='sliding', sparse=False,
                 connectivity='largest', dt_traj='1 step', maxiter=1000000,
                 maxerr=1e-8):
        r"""Maximum likelihood estimator for MSMs given discrete trajectory statistics

        Parameters
        ----------
        lag : int
            lag time at which transitions are counted and the transition matrix is
            estimated.

        reversible : bool, optional, default = True
            If true compute reversible MSM, else non-reversible MSM

        statdist : (M,) ndarray, optional
            Stationary vector on the full set of states. Estimation will be
            made such the the resulting transition matrix has this distribution
            as an equilibrium distribution. Set probabilities to zero if these
            states should be excluded from the analysis.

        count_mode : str, optional, default='sliding'
            mode to obtain count matrices from discrete trajectories. Should be
            one of:

            * 'sliding' : A trajectory of length T will have :math:`T-tau` counts
              at time indexes

              .. math::

                 (0 \rightarrow \tau), (1 \rightarrow \tau+1), ..., (T-\tau-1 \rightarrow T-1)

            * 'effective' : Uses an estimate of the transition counts that are
              statistically uncorrelated. Recommended when used with a
              Bayesian MSM.
            * 'sample' : A trajectory of length T will have :math:`T/tau` counts
              at time indexes

              .. math::

                    (0 \rightarrow \tau), (\tau \rightarrow 2 \tau), ..., (((T/tau)-1) \tau \rightarrow T)

        sparse : bool, optional, default = False
            If true compute count matrix, transition matrix and all derived
            quantities using sparse matrix algebra. In this case python sparse
            matrices will be returned by the corresponding functions instead of
            numpy arrays. This behavior is suggested for very large numbers of
            states (e.g. > 4000) because it is likely to be much more efficient.
        connectivity : str, optional, default = 'largest'
            Connectivity mode. Three methods are intended (currently only 'largest'
            is implemented)

            * 'largest' : The active set is the largest reversibly connected set.
              All estimation will be done on this subset and all quantities
              (transition matrix, stationary distribution, etc) are only defined
              on this subset and are correspondingly smaller than the full set
              of states
            * 'all' : The active set is the full set of states. Estimation will be
              conducted on each reversibly connected set separately. That means
              the transition matrix will decompose into disconnected submatrices,
              the stationary vector is only defined within subsets, etc.
              Currently not implemented.
            * 'none' : The active set is the full set of states. Estimation will
              be conducted on the full set of
              states without ensuring connectivity. This only permits
              nonreversible estimation. Currently not implemented.

        dt_traj : str, optional, default='1 step'
            Description of the physical time of the input trajectories. May be used
            by analysis algorithms such as plotting tools to pretty-print the axes.
            By default '1 step', i.e. there is no physical time unit. Specify by a
            number, whitespace and unit. Permitted units are (* is an arbitrary
            string):

            |  'fs',  'femtosecond*'
            |  'ps',  'picosecond*'
            |  'ns',  'nanosecond*'
            |  'us',  'microsecond*'
            |  'ms',  'millisecond*'
            |  's',   'second*'

        maxiter: int, optioanl, default = 1000000
            Optional parameter with reversible = True. maximum number of iterations
            before the transition matrix estimation method exits
        maxerr : float, optional, default = 1e-8
            Optional parameter with reversible = True.
            convergence tolerance for transition matrix estimation.
            This specifies the maximum change of the Euclidean norm of relative
            stationary probabilities (:math:`x_i = \sum_k x_{ik}`). The relative
            stationary probability changes
            :math:`e_i = (x_i^{(1)} - x_i^{(2)})/(x_i^{(1)} + x_i^{(2)})` are used
            in order to track changes in small probabilities. The Euclidean norm
            of the change vector, :math:`|e_i|_2`, is compared to maxerr.

        """
        self.lag = lag

        # set basic parameters
        self.reversible = reversible
        self.statdist_constraint = _types.ensure_ndarray_or_None(statdist_constraint, ndim=None, kind='numeric')
        if self.statdist_constraint is not None:  # renormalize
            self.statdist_constraint /= self.statdist_constraint.sum()

        # sparse matrix computation wanted?
        self.sparse = sparse

        # store counting mode (lowercase)
        self.count_mode = str(count_mode).lower()
        if self.count_mode not in ('sliding', 'effective', 'sample'):
            raise ValueError('count mode ' + count_mode + ' is unknown.')

        # store connectivity mode (lowercase)
        self.connectivity = connectivity.lower()
        if self.connectivity == 'largest':
            pass  # this is the current default. no need to do anything
        elif self.connectivity == 'all':
            raise NotImplementedError('MSM estimation with connectivity=\'all\' is currently not implemented.')
        elif self.connectivity == 'none':
            raise NotImplementedError('MSM estimation with connectivity=\'none\' is currently not implemented.')
        else:
            raise ValueError('connectivity mode ' + str(connectivity) + ' is unknown.')

        # time step
        self.dt_traj = dt_traj
        self.timestep_traj = _TimeUnit(dt_traj)

        # convergence parameters
        self.maxiter = maxiter
        self.maxerr = maxerr
 def dt_traj(self, value):
     # time step
     self._dt_traj = value
     self.timestep_traj = _TimeUnit(self.dt_traj)
Example #6
0
    def __init__(self,
                 lag,
                 count_mode='sliding',
                 connectivity='summed_count_matrix',
                 ground_state=None,
                 nstates_full=None,
                 equilibrium=None,
                 maxiter=10000,
                 maxerr=1.0E-15,
                 save_convergence_info=0,
                 dt_traj='1 step',
                 nn=None,
                 connectivity_factor=1.0,
                 direct_space=False,
                 N_dtram_accelerations=0,
                 callback=None,
                 init='mbar',
                 init_maxiter=5000,
                 init_maxerr=1.0E-8,
                 overcounting_factor=1.0):
        r"""Transition(-based) Reweighting Analysis Method

        Parameters
        ----------
        lag : int
            Integer lag time at which transitions are counted.
        count_mode : str, optional, default='sliding'
            mode to obtain count matrices from discrete trajectories. Should be
            one of:
            * 'sliding' : A trajectory of length T will have :math:`T-\tau` counts at time indexes
                  .. math::
                     (0 \rightarrow \tau), (1 \rightarrow \tau+1), ..., (T-\tau-1 \rightarrow T-1)
            * 'sample' : A trajectory of length T will have :math:`T/\tau` counts
              at time indexes
                  .. math::
                        (0 \rightarrow \tau), (\tau \rightarrow 2 \tau), ..., ((T/\tau-1) \tau \rightarrow T)
            Currently only 'sliding' is supported.
        connectivity : str, optional, default='summed_count_matrix'
            One of 'summed_count_matrix', 'strong_in_every_ensemble',
            'neighbors', 'post_hoc_RE' or 'BAR_variance'.
            Defines what should be considered a connected set in the joint space
            of conformations and thermodynamic ensembles.
            For details see thermotools.cset.compute_csets_TRAM.
        ground_state : int, optional, default=None
            Index of the unbiased thermodynamic state or None if there is no unbiased data available.
        nstates_full : int, optional, default=None
            Number of cluster centers, i.e., the size of the full set of states.
        equilibrium : list of booleans, optional 
            For every trajectory triple (ttraj[i], dtraj[i], btraj[i]), indicates
            whether to assume global equilibrium. If true, the triple is not used
            for computing kinetic quantities (but only thermodynamic quantities).
            By default, no trajectory is assumed to be in global equilibrium.
            This is the TRAMMBAR extension.
        maxiter : int, optional, default=10000
            The maximum number of self-consistent iterations before the estimator exits unsuccessfully.
        maxerr : float, optional, default=1E-15
            Convergence criterion based on the maximal free energy change in a self-consistent
            iteration step.
        save_convergence_info : int, optional, default=0
            Every save_convergence_info iteration steps, store the actual increment
            and the actual loglikelihood; 0 means no storage.
        dt_traj : str, optional, default='1 step'
            Description of the physical time corresponding to the lag. May be used by analysis
            algorithms such as plotting tools to pretty-print the axes. By default '1 step', i.e.
            there is no physical time unit.  Specify by a number, whitespace and unit. Permitted
            units are (* is an arbitrary string):

            |  'fs',   'femtosecond*'
            |  'ps',   'picosecond*'
            |  'ns',   'nanosecond*'
            |  'us',   'microsecond*'
            |  'ms',   'millisecond*'
            |  's',    'second*'
        nn : int, optional, default=None
            Only needed if connectivity='neighbors'
            See thermotools.cset.compute_csets_TRAM.
        connectivity_factor : float, optional, default=1.0
            Only needed if connectivity='post_hoc_RE' or 'BAR_variance'. Weakens the connectivity
            requirement, see thermotools.cset.compute_csets_TRAM.
        direct_space : bool, optional, default=False
            Whether to perform the self-consitent iteration with Boltzmann factors
            (direct space) or free energies (log-space). When analyzing data from
            multi-temperature simulations, direct-space is not recommended.
        N_dtram_accelerations : int, optional, default=0
            Convergence of TRAM can be speeded up by interleaving the updates
            in the self-consitent iteration with a dTRAM-like update step.
            N_dtram_accelerations says how many times the dTRAM-like update
            step should be applied in every iteration of the TRAM equations.
            Currently this is only effective if direct_space=True.
        init : str, optional, default=None
            Use a specific initialization for self-consistent iteration:

            | None:    use a hard-coded guess for free energies and Lagrangian multipliers
            | 'mbar':  perform a short MBAR estimate to initialize the free energies
        init_maxiter : int, optional, default=5000
            The maximum number of self-consistent iterations during the initialization.
        init_maxerr : float, optional, default=1.0E-8
            Convergence criterion for the initialization.
        overcounting_factor : double, default = 1.0
            Only needed if equilibrium contains True (TRAMMBAR).
            Sets the relative statistical weight of equilibrium and non-equilibrium
            frames. An overcounting_factor of value n means that every
            non-equilibrium frame is counted n times. Values larger than 1 increase
            the relative weight of the non-equilibrium data. Values less than 1
            increase the relative weight of the equilibrium data.


        References
        ----------

        .. [1] Wu, H. et al 2016
            Multiensemble Markov models of molecular thermodynamics and kinetics
            Proc. Natl. Acad. Sci. USA 113 E3221--E3230

        """
        self.lag = lag
        assert count_mode == 'sliding', 'Currently the only implemented count_mode is \'sliding\''
        self.count_mode = count_mode
        self.connectivity = connectivity
        self.nn = nn
        self.connectivity_factor = connectivity_factor
        self.dt_traj = dt_traj
        self.timestep_traj = _TimeUnit(dt_traj)
        self.ground_state = ground_state
        self.nstates_full = nstates_full
        self.equilibrium = equilibrium
        self.maxiter = maxiter
        self.maxerr = maxerr
        self.direct_space = direct_space
        self.N_dtram_accelerations = N_dtram_accelerations
        self.callback = callback
        self.save_convergence_info = save_convergence_info
        assert init in (
            None, 'mbar'), 'Currently only None and \'mbar\' are supported'
        self.init = init
        self.init_maxiter = init_maxiter
        self.init_maxerr = init_maxerr
        self.overcounting_factor = overcounting_factor
        self.active_set = None
        self.biased_conf_energies = None
        self.mbar_therm_energies = None
        self.log_lagrangian_mult = None
        self.loglikelihoods = None
Example #7
0
    def __init__(self,
                 lag,
                 count_mode='sliding',
                 connectivity='post_hoc_RE',
                 nstates_full=None,
                 equilibrium=None,
                 maxiter=10000,
                 maxerr=1.0E-15,
                 save_convergence_info=0,
                 dt_traj='1 step',
                 nn=None,
                 connectivity_factor=1.0,
                 direct_space=False,
                 N_dtram_accelerations=0,
                 callback=None,
                 init='mbar',
                 init_maxiter=5000,
                 init_maxerr=1.0E-8,
                 overcounting_factor=1.0):
        r"""Transition(-based) Reweighting Analysis Method

        Parameters
        ----------
        lag : int
            Integer lag time at which transitions are counted.
        count_mode : str, optional, default='sliding'
            mode to obtain count matrices from discrete trajectories. Should be
            one of:
            * 'sliding' : A trajectory of length T will have :math:`T-\tau` counts at time indexes
                  .. math::
                     (0 \rightarrow \tau), (1 \rightarrow \tau+1), ..., (T-\tau-1 \rightarrow T-1)
            * 'sample' : A trajectory of length T will have :math:`T/\tau` counts
              at time indexes
                  .. math::
                        (0 \rightarrow \tau), (\tau \rightarrow 2 \tau), ..., ((T/\tau-1) \tau \rightarrow T)
            Currently only 'sliding' is supported.
        connectivity : str, optional, default='post_hoc_RE'
            One of 'post_hoc_RE', 'BAR_variance', 'reversible_pathways' or
            'summed_count_matrix'. Defines what should be considered a connected set
            in the joint (product) space of conformations and thermodynamic ensembles.
            * 'reversible_pathways' : requires that every state in the connected set
              can be reached by following a pathway of reversible transitions. A
              reversible transition between two Markov states (within the same
              thermodynamic state k) is a pair of Markov states that belong to the
              same strongly connected component of the count matrix (from
              thermodynamic state k). A pathway of reversible transitions is a list of
              reversible transitions [(i_1, i_2), (i_2, i_3),..., (i_(N-2), i_(N-1)),
              (i_(N-1), i_N)]. The thermodynamic state where the reversible
              transitions happen, is ignored in constructing the reversible pathways.
              This is equivalent to assuming that two ensembles overlap at some Markov
              state whenever there exist frames from both ensembles in that Markov
              state.
            * 'post_hoc_RE' : similar to 'reversible_pathways' but with a more strict
              requirement for the overlap between thermodynamic states. It is required
              that every state in the connected set can be reached by following a
              pathway of reversible transitions or jumping between overlapping
              thermodynamic states while staying in the same Markov state. A reversible
              transition between two Markov states (within the same thermodynamic
              state k) is a pair of Markov states that belong to the same strongly
              connected component of the count matrix (from thermodynamic state k).
              Two thermodynamic states k and l are defined to overlap at Markov state
              n if a replica exchange simulation [2]_ restricted to state n would show
              at least one transition from k to l or one transition from from l to k.
              The expected number of replica exchanges is estimated from the
              simulation data. The minimal number required of replica exchanges
              per Markov state can be increased by decreasing `connectivity_factor`.
            * 'BAR_variance' : like 'post_hoc_RE' but with a different condition to
              define the thermodynamic overlap based on the variance of the BAR
              estimator [3]_. Two thermodynamic states k and l are defined to overlap
              at Markov state n if the variance of the free energy difference Delta
              f_{kl} computed with BAR (and restricted to conformations form Markov
              state n) is less or equal than one. The minimally required variance
              can be controlled with `connectivity_factor`.
            * 'summed_count_matrix' : all thermodynamic states are assumed to overlap.
              The connected set is then computed by summing the count matrices over
              all thermodynamic states and taking it's largest strongly connected set.
              Not recommended!
            For more details see :func:`thermotools.cset.compute_csets_TRAM`.
        nstates_full : int, optional, default=None
            Number of cluster centers, i.e., the size of the full set of states.
        equilibrium : list of booleans, optional
            For every trajectory triple (ttraj[i], dtraj[i], btraj[i]), indicates
            whether to assume global equilibrium. If true, the triple is not used
            for computing kinetic quantities (but only thermodynamic quantities).
            By default, no trajectory is assumed to be in global equilibrium.
            This is the TRAMMBAR extension.
        maxiter : int, optional, default=10000
            The maximum number of self-consistent iterations before the estimator exits unsuccessfully.
        maxerr : float, optional, default=1E-15
            Convergence criterion based on the maximal free energy change in a self-consistent
            iteration step.
        save_convergence_info : int, optional, default=0
            Every save_convergence_info iteration steps, store the actual increment
            and the actual log-likelihood; 0 means no storage.
        dt_traj : str, optional, default='1 step'
            Description of the physical time corresponding to the lag. May be used by analysis
            algorithms such as plotting tools to pretty-print the axes. By default '1 step', i.e.
            there is no physical time unit.  Specify by a number, whitespace and unit. Permitted
            units are (* is an arbitrary string):

            |  'fs',   'femtosecond*'
            |  'ps',   'picosecond*'
            |  'ns',   'nanosecond*'
            |  'us',   'microsecond*'
            |  'ms',   'millisecond*'
            |  's',    'second*'
        connectivity_factor : float, optional, default=1.0
            Only needed if connectivity='post_hoc_RE' or 'BAR_variance'. Values
            greater than 1.0 weaken the connectivity conditions. For 'post_hoc_RE'
            this multiplies the number of hypothetically observed transitions. For
            'BAR_variance' this scales the threshold for the minimal allowed variance
            of free energy differences.
        direct_space : bool, optional, default=False
            Whether to perform the self-consistent iteration with Boltzmann factors
            (direct space) or free energies (log-space). When analyzing data from
            multi-temperature simulations, direct-space is not recommended.
        N_dtram_accelerations : int, optional, default=0
            Convergence of TRAM can be speeded up by interleaving the updates
            in the self-consistent iteration with a dTRAM-like update step.
            N_dtram_accelerations says how many times the dTRAM-like update
            step should be applied in every iteration of the TRAM equations.
            Currently this is only effective if direct_space=True.
        init : str, optional, default=None
            Use a specific initialization for self-consistent iteration:

            | None:    use a hard-coded guess for free energies and Lagrangian multipliers
            | 'mbar':  perform a short MBAR estimate to initialize the free energies
        init_maxiter : int, optional, default=5000
            The maximum number of self-consistent iterations during the initialization.
        init_maxerr : float, optional, default=1.0E-8
            Convergence criterion for the initialization.
        overcounting_factor : double, default = 1.0
            Only needed if equilibrium contains True (TRAMMBAR).
            Sets the relative statistical weight of equilibrium and non-equilibrium
            frames. An overcounting_factor of value n means that every
            non-equilibrium frame is counted n times. Values larger than 1 increase
            the relative weight of the non-equilibrium data. Values less than 1
            increase the relative weight of the equilibrium data.


        References
        ----------
        .. [1] Wu, H. et al 2016
            Multiensemble Markov models of molecular thermodynamics and kinetics
            Proc. Natl. Acad. Sci. USA 113 E3221--E3230
        .. [2]_ Hukushima et al, Exchange Monte Carlo method and application to spin
            glass simulations, J. Phys. Soc. Jan. 65, 1604 (1996)
        .. [3]_ Shirts and Chodera, Statistically optimal analysis of samples
            from multiple equilibrium states, J. Chem. Phys. 129, 124105 (2008)

        """
        self.lag = lag
        assert count_mode == 'sliding', 'Currently the only implemented count_mode is \'sliding\''
        self.count_mode = count_mode
        self.connectivity = connectivity
        self.nn = nn
        self.connectivity_factor = connectivity_factor
        self.dt_traj = dt_traj
        self.timestep_traj = _TimeUnit(dt_traj)
        self.nstates_full = nstates_full
        self.equilibrium = equilibrium
        self.maxiter = maxiter
        self.maxerr = maxerr
        self.direct_space = direct_space
        self.N_dtram_accelerations = N_dtram_accelerations
        self.callback = callback
        self.save_convergence_info = save_convergence_info
        assert init in (
            None, 'mbar'), 'Currently only None and \'mbar\' are supported'
        self.init = init
        self.init_maxiter = init_maxiter
        self.init_maxerr = init_maxerr
        self.overcounting_factor = overcounting_factor
        self.active_set = None
        self.biased_conf_energies = None
        self.mbar_therm_energies = None
        self.log_lagrangian_mult = None
        self.loglikelihoods = None
Example #8
0
    def __init__(
        self, bias_energies_full, lag, count_mode='sliding', connectivity='largest',
        maxiter=10000, maxerr=1.0E-15, save_convergence_info=0, dt_traj='1 step',
        init=None, init_maxiter=10000, init_maxerr=1.0E-8):
        r""" Discrete Transition(-based) Reweighting Analysis Method

        Parameters
        ----------
        bias_energies_full : numpy.ndarray(shape=(num_therm_states, num_conf_states)) object
            bias_energies_full[j, i] is the bias energy in units of kT for each discrete state i
            at thermodynamic state j.
        lag : int
            Integer lag time at which transitions are counted.
        count_mode : str, optional, default='sliding'
            Mode to obtain count matrices from discrete trajectories. Should be one of:
            * 'sliding' : a trajectory of length T will have :math:`T-\tau` counts at time indexes
                  .. math::
                     (0 \rightarrow \tau), (1 \rightarrow \tau+1), ..., (T-\tau-1 \rightarrow T-1)
            * 'sample' : a trajectory of length T will have :math:`T/\tau` counts at time indexes
                  .. math::
                        (0 \rightarrow \tau), (\tau \rightarrow 2 \tau), ..., ((T/\tau-1) \tau \rightarrow T)
            Currently only 'sliding' is supported.
        connectivity : str, optional, default='largest'
            Defines what should be considered a connected set in the joint space of conformations and
            thermodynamic ensembles. Currently only 'largest' is supported.
        maxiter : int, optional, default=10000
            The maximum number of self-consistent iterations before the estimator exits unsuccessfully.
        maxerr : float, optional, default=1.0E-15
            Convergence criterion based on the maximal free energy change in a self-consistent
            iteration step.
        save_convergence_info : int, optional, default=0
            Every save_convergence_info iteration steps, store the actual increment
            and the actual loglikelihood; 0 means no storage.
        dt_traj : str, optional, default='1 step'
            Description of the physical time corresponding to the lag. May be used by analysis
            algorithms such as plotting tools to pretty-print the axes. By default '1 step', i.e.
            there is no physical time unit.  Specify by a number, whitespace and unit. Permitted
            units are (* is an arbitrary string):

            |  'fs',   'femtosecond*'
            |  'ps',   'picosecond*'
            |  'ns',   'nanosecond*'
            |  'us',   'microsecond*'
            |  'ms',   'millisecond*'
            |  's',    'second*'
        init : str, optional, default=None
            Use a specific initialization for self-consistent iteration:

            | None:    use a hard-coded guess for free energies and Lagrangian multipliers
            | 'wham':  perform a short WHAM estimate to initialize the free energies
        init_maxiter : int, optional, default=10000
            The maximum number of self-consistent iterations during the initialization.
        init_maxerr : float, optional, default=1.0E-8
            Convergence criterion for the initialization.

        Example
        -------
        >>> from pyemma.thermo import DTRAM
        >>> import numpy as np
        >>> B = np.array([[0, 0],[0.5, 1.0]])
        >>> dtram = DTRAM(B, 1)
        >>> ttrajs = [np.array([0,0,0,0,0,0,0,0,0,0]),np.array([1,1,1,1,1,1,1,1,1,1])]
        >>> dtrajs = [np.array([0,0,0,0,1,1,1,0,0,0]),np.array([0,1,0,1,0,1,1,0,0,1])]
        >>> dtram = dtram.estimate((ttrajs, dtrajs))
        >>> dtram.log_likelihood() # doctest: +ELLIPSIS
        -9.805...
        >>> dtram.count_matrices # doctest: +SKIP
        array([[[5, 1],
                [1, 2]],

               [[1, 4],
                [3, 1]]], dtype=int32)
        >>> dtram.stationary_distribution # doctest: +ELLIPSIS
        array([ 0.38...,  0.61...])
        >>> dtram.meval('stationary_distribution') # doctest: +ELLIPSIS
        [array([ 0.38...,  0.61...]), array([ 0.50...,  0.49...])]

        References
        ----------

        .. [1] Wu, H. et al 2014
            Statistically optimal analysis of state-discretized trajectory data from multiple thermodynamic states
            J. Chem. Phys. 141, 214106

        """
        # set all parameters
        self.bias_energies_full = _types.ensure_ndarray(bias_energies_full, ndim=2, kind='numeric')
        self.lag = lag
        assert count_mode == 'sliding', 'Currently the only implemented count_mode is \'sliding\''
        self.count_mode = count_mode
        assert connectivity == 'largest', 'Currently the only implemented connectivity is \'largest\''
        self.connectivity = connectivity
        self.dt_traj = dt_traj
        self.maxiter = maxiter
        self.maxerr = maxerr
        self.save_convergence_info = save_convergence_info
        assert init in (None, 'wham'), 'Currently only None and \'wham\' are supported'
        self.init = init
        self.init_maxiter = init_maxiter
        self.init_maxerr = init_maxerr
        # set derived quantities
        self.nthermo, self.nstates_full = bias_energies_full.shape
        self.timestep_traj = _TimeUnit(dt_traj)
        # set iteration variables
        self.therm_energies = None
        self.conf_energies = None
        self.log_lagrangian_mult = None
Example #9
0
    def __init__(self, lag=1, reversible=True, count_mode='sliding', sparse=False,
                 connectivity='largest', dt_traj='1 step'):
        r"""Maximum likelihood estimator for MSMs given discrete trajectory statistics

        Parameters
        ----------
        lag : int
            lag time at which transitions are counted and the transition matrix is
            estimated.

        reversible : bool, optional, default = True
            If true compute reversible MSM, else non-reversible MSM

        count_mode : str, optional, default='sliding'
            mode to obtain count matrices from discrete trajectories. Should be
            one of:

            * 'sliding' : A trajectory of length T will have :math:`T-tau` counts
              at time indexes

              .. math::

                 (0 \rightarrow \tau), (1 \rightarrow \tau+1), ..., (T-\tau-1 \rightarrow T-1)

            * 'effective' : Uses an estimate of the transition counts that are
              statistically uncorrelated. Recommended when used with a
              Bayesian MSM.
            * 'sample' : A trajectory of length T will have :math:`T/tau` counts
              at time indexes

              .. math::

                    (0 \rightarrow \tau), (\tau \rightarrow 2 \tau), ..., (((T/tau)-1) \tau \rightarrow T)

        sparse : bool, optional, default = False
            If true compute count matrix, transition matrix and all derived
            quantities using sparse matrix algebra. In this case python sparse
            matrices will be returned by the corresponding functions instead of
            numpy arrays. This behavior is suggested for very large numbers of
            states (e.g. > 4000) because it is likely to be much more efficient.
        connectivity : str, optional, default = 'largest'
            Connectivity mode. Three methods are intended (currently only 'largest'
            is implemented)

            * 'largest' : The active set is the largest reversibly connected set.
              All estimation will be done on this subset and all quantities
              (transition matrix, stationary distribution, etc) are only defined
              on this subset and are correspondingly smaller than the full set
              of states
            * 'all' : The active set is the full set of states. Estimation will be
              conducted on each reversibly connected set separately. That means
              the transition matrix will decompose into disconnected submatrices,
              the stationary vector is only defined within subsets, etc.
              Currently not implemented.
            * 'none' : The active set is the full set of states. Estimation will
              be conducted on the full set of
              states without ensuring connectivity. This only permits
              nonreversible estimation. Currently not implemented.

        dt_traj : str, optional, default='1 step'
            Description of the physical time of the input trajectories. May be used
            by analysis algorithms such as plotting tools to pretty-print the axes.
            By default '1 step', i.e. there is no physical time unit. Specify by a
            number, whitespace and unit. Permitted units are (* is an arbitrary
            string):

            |  'fs',  'femtosecond*'
            |  'ps',  'picosecond*'
            |  'ns',  'nanosecond*'
            |  'us',  'microsecond*'
            |  'ms',  'millisecond*'
            |  's',   'second*'

        """
        self.lag = lag

        # set basic parameters
        self.reversible = reversible

        # sparse matrix computation wanted?
        self.sparse = sparse

        # store counting mode (lowercase)
        self.count_mode = str(count_mode).lower()
        if self.count_mode not in ('sliding', 'effective', 'sample'):
            raise ValueError('count mode ' + count_mode + ' is unknown.')

        # store connectivity mode (lowercase)
        self.connectivity = connectivity.lower()
        if self.connectivity == 'largest':
            pass  # this is the current default. no need to do anything
        elif self.connectivity == 'all':
            raise NotImplementedError('MSM estimation with connectivity=\'all\' is currently not implemented.')
        elif self.connectivity == 'none':
            raise NotImplementedError('MSM estimation with connectivity=\'none\' is currently not implemented.')
        else:
            raise ValueError('connectivity mode ' + str(connectivity) + ' is unknown.')

        # time step
        self.dt_traj = dt_traj
        self.timestep_traj = _TimeUnit(dt_traj)
Example #10
0
    def __init__(self,
                 bias_energies_full,
                 maxiter=10000,
                 maxerr=1.0E-15,
                 save_convergence_info=0,
                 dt_traj='1 step',
                 stride=1):
        r"""Weighted Histogram Analysis Method

        Parameters
        ----------
        bias_energies_full : numpy.ndarray(shape=(num_therm_states, num_conf_states)) object
            bias_energies_full[j, i] is the bias energy in units of kT for each discrete state i
            at thermodynamic state j.
        maxiter : int, optional, default=10000
            The maximum number of self-consistent iterations before the estimator exits unsuccessfully.
        maxerr : float, optional, default=1.0E-15
            Convergence criterion based on the maximal free energy change in a self-consistent
            iteration step.
        save_convergence_info : int, optional, default=0
            Every save_convergence_info iteration steps, store the actual increment
            and the actual loglikelihood; 0 means no storage.
        dt_traj : str, optional, default='1 step'
            Description of the physical time corresponding to the lag. May be used by analysis
            algorithms such as plotting tools to pretty-print the axes. By default '1 step', i.e.
            there is no physical time unit.  Specify by a number, whitespace and unit. Permitted
            units are (* is an arbitrary string):

            |  'fs',   'femtosecond*'
            |  'ps',   'picosecond*'
            |  'ns',   'nanosecond*'
            |  'us',   'microsecond*'
            |  'ms',   'millisecond*'
            |  's',    'second*'
        stride : int, optional, default=1
            not used

        Example
        -------
        >>> from pyemma.thermo import WHAM
        >>> import numpy as np
        >>> B = np.array([[0, 0],[0.5, 1.0]])
        >>> wham = WHAM(B)
        >>> ttrajs = [np.array([0,0,0,0,0,0,0,0,0,0]),np.array([1,1,1,1,1,1,1,1,1,1])]
        >>> dtrajs = [np.array([0,0,0,0,1,1,1,0,0,0]),np.array([0,1,0,1,0,1,1,0,0,1])]
        >>> wham = wham.estimate((ttrajs, dtrajs))
        >>> wham.log_likelihood() # doctest: +ELLIPSIS
        -6.6...
        >>> wham.state_counts # doctest: +SKIP
        array([[7, 3],
               [5, 5]])
        >>> wham.stationary_distribution # doctest: +ELLIPSIS +REPORT_NDIFF
        array([ 0.5...,  0.4...])
        >>> wham.meval('stationary_distribution') # doctest: +ELLIPSIS +REPORT_NDIFF
        [array([ 0.5...,  0.4...]), array([ 0.6...,  0.3...])]

        References
        ----------
        
        .. [1] Ferrenberg, A.M. and Swensen, R.H. 1988.
            New Monte Carlo Technique for Studying Phase Transitions.
            Phys. Rev. Lett. 23, 2635--2638

        .. [2] Kumar, S. et al 1992.
            The Weighted Histogram Analysis Method for Free-Energy Calculations on Biomolecules. I. The Method.
            J. Comp. Chem. 13, 1011--1021

        """
        self.bias_energies_full = _types.ensure_ndarray(bias_energies_full,
                                                        ndim=2,
                                                        kind='numeric')
        self.stride = stride
        self.dt_traj = dt_traj
        self.maxiter = maxiter
        self.maxerr = maxerr
        self.save_convergence_info = save_convergence_info
        # set derived quantities
        self.nthermo, self.nstates_full = bias_energies_full.shape
        self.timestep_traj = _TimeUnit(dt_traj)
        # set iteration variables
        self.therm_energies = None
        self.conf_energies = None