def __init__(self, bias_energies_full, lag, count_mode='sliding', connectivity='largest', maxiter=10000, maxerr=1E-15, dt_traj='1 step', save_convergence_info=0, init=None): # set all parameters self.bias_energies_full = _types.ensure_ndarray(bias_energies_full, ndim=2, kind='numeric') self.lag = lag assert count_mode == 'sliding', 'Currently the only implemented count_mode is \'sliding\'' self.count_mode = count_mode assert connectivity == 'largest', 'Currently the only implemented connectivity is \'largest\'' self.connectivity = connectivity assert init in ( None, 'wham'), 'Currently only None and \'wham\' are supported' self.init = init self.dt_traj = dt_traj self.maxiter = maxiter self.maxerr = maxerr self.save_convergence_info = save_convergence_info # set derived quantities self.nthermo, self.nstates_full = bias_energies_full.shape self.timestep_traj = _TimeUnit(dt_traj) # set iteration variables self.therm_energies = None self.conf_energies = None self.log_lagrangian_mult = None
def __init__(self, lag, count_mode='sliding', connectivity='summed_count_matrix', ground_state=None, nstates_full=None, equilibrium=None, maxiter=10000, maxerr=1.0E-15, save_convergence_info=0, dt_traj='1 step', nn=None, connectivity_factor=1.0, direct_space=False, N_dtram_accelerations=0, callback=None, init='mbar', init_maxiter=5000, init_maxerr=1.0E-8, overcounting_factor=1.0): self.lag = lag assert count_mode == 'sliding', 'Currently the only implemented count_mode is \'sliding\'' self.count_mode = count_mode self.connectivity = connectivity self.nn = nn self.connectivity_factor = connectivity_factor self.dt_traj = dt_traj self.timestep_traj = _TimeUnit(dt_traj) self.ground_state = ground_state self.nstates_full = nstates_full self.equilibrium = equilibrium self.maxiter = maxiter self.maxerr = maxerr self.direct_space = direct_space self.N_dtram_accelerations = N_dtram_accelerations self.callback = callback self.save_convergence_info = save_convergence_info assert init in ( None, 'mbar'), 'Currently only None and \'mbar\' are supported' self.init = init self.init_maxiter = init_maxiter self.init_maxerr = init_maxerr self.overcounting_factor = overcounting_factor self.active_set = None self.biased_conf_energies = None self.mbar_therm_energies = None self.log_lagrangian_mult = None self.loglikelihoods = None
def __init__(self, bias_energies_full, maxiter=10000, maxerr=1.0E-15, save_convergence_info=0, dt_traj='1 step', stride=1): self.bias_energies_full = _types.ensure_ndarray(bias_energies_full, ndim=2, kind='numeric') self.stride = stride self.dt_traj = dt_traj self.maxiter = maxiter self.maxerr = maxerr self.save_convergence_info = save_convergence_info # set derived quantities self.nthermo, self.nstates_full = bias_energies_full.shape self.timestep_traj = _TimeUnit(dt_traj) # set iteration variables self.therm_energies = None self.conf_energies = None
def __init__(self, lag=1, reversible=True, statdist_constraint=None, count_mode='sliding', sparse=False, connectivity='largest', dt_traj='1 step', maxiter=1000000, maxerr=1e-8): r"""Maximum likelihood estimator for MSMs given discrete trajectory statistics Parameters ---------- lag : int lag time at which transitions are counted and the transition matrix is estimated. reversible : bool, optional, default = True If true compute reversible MSM, else non-reversible MSM statdist : (M,) ndarray, optional Stationary vector on the full set of states. Estimation will be made such the the resulting transition matrix has this distribution as an equilibrium distribution. Set probabilities to zero if these states should be excluded from the analysis. count_mode : str, optional, default='sliding' mode to obtain count matrices from discrete trajectories. Should be one of: * 'sliding' : A trajectory of length T will have :math:`T-tau` counts at time indexes .. math:: (0 \rightarrow \tau), (1 \rightarrow \tau+1), ..., (T-\tau-1 \rightarrow T-1) * 'effective' : Uses an estimate of the transition counts that are statistically uncorrelated. Recommended when used with a Bayesian MSM. * 'sample' : A trajectory of length T will have :math:`T/tau` counts at time indexes .. math:: (0 \rightarrow \tau), (\tau \rightarrow 2 \tau), ..., (((T/tau)-1) \tau \rightarrow T) sparse : bool, optional, default = False If true compute count matrix, transition matrix and all derived quantities using sparse matrix algebra. In this case python sparse matrices will be returned by the corresponding functions instead of numpy arrays. This behavior is suggested for very large numbers of states (e.g. > 4000) because it is likely to be much more efficient. connectivity : str, optional, default = 'largest' Connectivity mode. Three methods are intended (currently only 'largest' is implemented) * 'largest' : The active set is the largest reversibly connected set. All estimation will be done on this subset and all quantities (transition matrix, stationary distribution, etc) are only defined on this subset and are correspondingly smaller than the full set of states * 'all' : The active set is the full set of states. Estimation will be conducted on each reversibly connected set separately. That means the transition matrix will decompose into disconnected submatrices, the stationary vector is only defined within subsets, etc. Currently not implemented. * 'none' : The active set is the full set of states. Estimation will be conducted on the full set of states without ensuring connectivity. This only permits nonreversible estimation. Currently not implemented. dt_traj : str, optional, default='1 step' Description of the physical time of the input trajectories. May be used by analysis algorithms such as plotting tools to pretty-print the axes. By default '1 step', i.e. there is no physical time unit. Specify by a number, whitespace and unit. Permitted units are (* is an arbitrary string): | 'fs', 'femtosecond*' | 'ps', 'picosecond*' | 'ns', 'nanosecond*' | 'us', 'microsecond*' | 'ms', 'millisecond*' | 's', 'second*' maxiter: int, optioanl, default = 1000000 Optional parameter with reversible = True. maximum number of iterations before the transition matrix estimation method exits maxerr : float, optional, default = 1e-8 Optional parameter with reversible = True. convergence tolerance for transition matrix estimation. This specifies the maximum change of the Euclidean norm of relative stationary probabilities (:math:`x_i = \sum_k x_{ik}`). The relative stationary probability changes :math:`e_i = (x_i^{(1)} - x_i^{(2)})/(x_i^{(1)} + x_i^{(2)})` are used in order to track changes in small probabilities. The Euclidean norm of the change vector, :math:`|e_i|_2`, is compared to maxerr. """ self.lag = lag # set basic parameters self.reversible = reversible self.statdist_constraint = _types.ensure_ndarray_or_None(statdist_constraint, ndim=None, kind='numeric') if self.statdist_constraint is not None: # renormalize self.statdist_constraint /= self.statdist_constraint.sum() # sparse matrix computation wanted? self.sparse = sparse # store counting mode (lowercase) self.count_mode = str(count_mode).lower() if self.count_mode not in ('sliding', 'effective', 'sample'): raise ValueError('count mode ' + count_mode + ' is unknown.') # store connectivity mode (lowercase) self.connectivity = connectivity.lower() if self.connectivity == 'largest': pass # this is the current default. no need to do anything elif self.connectivity == 'all': raise NotImplementedError('MSM estimation with connectivity=\'all\' is currently not implemented.') elif self.connectivity == 'none': raise NotImplementedError('MSM estimation with connectivity=\'none\' is currently not implemented.') else: raise ValueError('connectivity mode ' + str(connectivity) + ' is unknown.') # time step self.dt_traj = dt_traj self.timestep_traj = _TimeUnit(dt_traj) # convergence parameters self.maxiter = maxiter self.maxerr = maxerr
def dt_traj(self, value): # time step self._dt_traj = value self.timestep_traj = _TimeUnit(self.dt_traj)
def __init__(self, lag, count_mode='sliding', connectivity='summed_count_matrix', ground_state=None, nstates_full=None, equilibrium=None, maxiter=10000, maxerr=1.0E-15, save_convergence_info=0, dt_traj='1 step', nn=None, connectivity_factor=1.0, direct_space=False, N_dtram_accelerations=0, callback=None, init='mbar', init_maxiter=5000, init_maxerr=1.0E-8, overcounting_factor=1.0): r"""Transition(-based) Reweighting Analysis Method Parameters ---------- lag : int Integer lag time at which transitions are counted. count_mode : str, optional, default='sliding' mode to obtain count matrices from discrete trajectories. Should be one of: * 'sliding' : A trajectory of length T will have :math:`T-\tau` counts at time indexes .. math:: (0 \rightarrow \tau), (1 \rightarrow \tau+1), ..., (T-\tau-1 \rightarrow T-1) * 'sample' : A trajectory of length T will have :math:`T/\tau` counts at time indexes .. math:: (0 \rightarrow \tau), (\tau \rightarrow 2 \tau), ..., ((T/\tau-1) \tau \rightarrow T) Currently only 'sliding' is supported. connectivity : str, optional, default='summed_count_matrix' One of 'summed_count_matrix', 'strong_in_every_ensemble', 'neighbors', 'post_hoc_RE' or 'BAR_variance'. Defines what should be considered a connected set in the joint space of conformations and thermodynamic ensembles. For details see thermotools.cset.compute_csets_TRAM. ground_state : int, optional, default=None Index of the unbiased thermodynamic state or None if there is no unbiased data available. nstates_full : int, optional, default=None Number of cluster centers, i.e., the size of the full set of states. equilibrium : list of booleans, optional For every trajectory triple (ttraj[i], dtraj[i], btraj[i]), indicates whether to assume global equilibrium. If true, the triple is not used for computing kinetic quantities (but only thermodynamic quantities). By default, no trajectory is assumed to be in global equilibrium. This is the TRAMMBAR extension. maxiter : int, optional, default=10000 The maximum number of self-consistent iterations before the estimator exits unsuccessfully. maxerr : float, optional, default=1E-15 Convergence criterion based on the maximal free energy change in a self-consistent iteration step. save_convergence_info : int, optional, default=0 Every save_convergence_info iteration steps, store the actual increment and the actual loglikelihood; 0 means no storage. dt_traj : str, optional, default='1 step' Description of the physical time corresponding to the lag. May be used by analysis algorithms such as plotting tools to pretty-print the axes. By default '1 step', i.e. there is no physical time unit. Specify by a number, whitespace and unit. Permitted units are (* is an arbitrary string): | 'fs', 'femtosecond*' | 'ps', 'picosecond*' | 'ns', 'nanosecond*' | 'us', 'microsecond*' | 'ms', 'millisecond*' | 's', 'second*' nn : int, optional, default=None Only needed if connectivity='neighbors' See thermotools.cset.compute_csets_TRAM. connectivity_factor : float, optional, default=1.0 Only needed if connectivity='post_hoc_RE' or 'BAR_variance'. Weakens the connectivity requirement, see thermotools.cset.compute_csets_TRAM. direct_space : bool, optional, default=False Whether to perform the self-consitent iteration with Boltzmann factors (direct space) or free energies (log-space). When analyzing data from multi-temperature simulations, direct-space is not recommended. N_dtram_accelerations : int, optional, default=0 Convergence of TRAM can be speeded up by interleaving the updates in the self-consitent iteration with a dTRAM-like update step. N_dtram_accelerations says how many times the dTRAM-like update step should be applied in every iteration of the TRAM equations. Currently this is only effective if direct_space=True. init : str, optional, default=None Use a specific initialization for self-consistent iteration: | None: use a hard-coded guess for free energies and Lagrangian multipliers | 'mbar': perform a short MBAR estimate to initialize the free energies init_maxiter : int, optional, default=5000 The maximum number of self-consistent iterations during the initialization. init_maxerr : float, optional, default=1.0E-8 Convergence criterion for the initialization. overcounting_factor : double, default = 1.0 Only needed if equilibrium contains True (TRAMMBAR). Sets the relative statistical weight of equilibrium and non-equilibrium frames. An overcounting_factor of value n means that every non-equilibrium frame is counted n times. Values larger than 1 increase the relative weight of the non-equilibrium data. Values less than 1 increase the relative weight of the equilibrium data. References ---------- .. [1] Wu, H. et al 2016 Multiensemble Markov models of molecular thermodynamics and kinetics Proc. Natl. Acad. Sci. USA 113 E3221--E3230 """ self.lag = lag assert count_mode == 'sliding', 'Currently the only implemented count_mode is \'sliding\'' self.count_mode = count_mode self.connectivity = connectivity self.nn = nn self.connectivity_factor = connectivity_factor self.dt_traj = dt_traj self.timestep_traj = _TimeUnit(dt_traj) self.ground_state = ground_state self.nstates_full = nstates_full self.equilibrium = equilibrium self.maxiter = maxiter self.maxerr = maxerr self.direct_space = direct_space self.N_dtram_accelerations = N_dtram_accelerations self.callback = callback self.save_convergence_info = save_convergence_info assert init in ( None, 'mbar'), 'Currently only None and \'mbar\' are supported' self.init = init self.init_maxiter = init_maxiter self.init_maxerr = init_maxerr self.overcounting_factor = overcounting_factor self.active_set = None self.biased_conf_energies = None self.mbar_therm_energies = None self.log_lagrangian_mult = None self.loglikelihoods = None
def __init__(self, lag, count_mode='sliding', connectivity='post_hoc_RE', nstates_full=None, equilibrium=None, maxiter=10000, maxerr=1.0E-15, save_convergence_info=0, dt_traj='1 step', nn=None, connectivity_factor=1.0, direct_space=False, N_dtram_accelerations=0, callback=None, init='mbar', init_maxiter=5000, init_maxerr=1.0E-8, overcounting_factor=1.0): r"""Transition(-based) Reweighting Analysis Method Parameters ---------- lag : int Integer lag time at which transitions are counted. count_mode : str, optional, default='sliding' mode to obtain count matrices from discrete trajectories. Should be one of: * 'sliding' : A trajectory of length T will have :math:`T-\tau` counts at time indexes .. math:: (0 \rightarrow \tau), (1 \rightarrow \tau+1), ..., (T-\tau-1 \rightarrow T-1) * 'sample' : A trajectory of length T will have :math:`T/\tau` counts at time indexes .. math:: (0 \rightarrow \tau), (\tau \rightarrow 2 \tau), ..., ((T/\tau-1) \tau \rightarrow T) Currently only 'sliding' is supported. connectivity : str, optional, default='post_hoc_RE' One of 'post_hoc_RE', 'BAR_variance', 'reversible_pathways' or 'summed_count_matrix'. Defines what should be considered a connected set in the joint (product) space of conformations and thermodynamic ensembles. * 'reversible_pathways' : requires that every state in the connected set can be reached by following a pathway of reversible transitions. A reversible transition between two Markov states (within the same thermodynamic state k) is a pair of Markov states that belong to the same strongly connected component of the count matrix (from thermodynamic state k). A pathway of reversible transitions is a list of reversible transitions [(i_1, i_2), (i_2, i_3),..., (i_(N-2), i_(N-1)), (i_(N-1), i_N)]. The thermodynamic state where the reversible transitions happen, is ignored in constructing the reversible pathways. This is equivalent to assuming that two ensembles overlap at some Markov state whenever there exist frames from both ensembles in that Markov state. * 'post_hoc_RE' : similar to 'reversible_pathways' but with a more strict requirement for the overlap between thermodynamic states. It is required that every state in the connected set can be reached by following a pathway of reversible transitions or jumping between overlapping thermodynamic states while staying in the same Markov state. A reversible transition between two Markov states (within the same thermodynamic state k) is a pair of Markov states that belong to the same strongly connected component of the count matrix (from thermodynamic state k). Two thermodynamic states k and l are defined to overlap at Markov state n if a replica exchange simulation [2]_ restricted to state n would show at least one transition from k to l or one transition from from l to k. The expected number of replica exchanges is estimated from the simulation data. The minimal number required of replica exchanges per Markov state can be increased by decreasing `connectivity_factor`. * 'BAR_variance' : like 'post_hoc_RE' but with a different condition to define the thermodynamic overlap based on the variance of the BAR estimator [3]_. Two thermodynamic states k and l are defined to overlap at Markov state n if the variance of the free energy difference Delta f_{kl} computed with BAR (and restricted to conformations form Markov state n) is less or equal than one. The minimally required variance can be controlled with `connectivity_factor`. * 'summed_count_matrix' : all thermodynamic states are assumed to overlap. The connected set is then computed by summing the count matrices over all thermodynamic states and taking it's largest strongly connected set. Not recommended! For more details see :func:`thermotools.cset.compute_csets_TRAM`. nstates_full : int, optional, default=None Number of cluster centers, i.e., the size of the full set of states. equilibrium : list of booleans, optional For every trajectory triple (ttraj[i], dtraj[i], btraj[i]), indicates whether to assume global equilibrium. If true, the triple is not used for computing kinetic quantities (but only thermodynamic quantities). By default, no trajectory is assumed to be in global equilibrium. This is the TRAMMBAR extension. maxiter : int, optional, default=10000 The maximum number of self-consistent iterations before the estimator exits unsuccessfully. maxerr : float, optional, default=1E-15 Convergence criterion based on the maximal free energy change in a self-consistent iteration step. save_convergence_info : int, optional, default=0 Every save_convergence_info iteration steps, store the actual increment and the actual log-likelihood; 0 means no storage. dt_traj : str, optional, default='1 step' Description of the physical time corresponding to the lag. May be used by analysis algorithms such as plotting tools to pretty-print the axes. By default '1 step', i.e. there is no physical time unit. Specify by a number, whitespace and unit. Permitted units are (* is an arbitrary string): | 'fs', 'femtosecond*' | 'ps', 'picosecond*' | 'ns', 'nanosecond*' | 'us', 'microsecond*' | 'ms', 'millisecond*' | 's', 'second*' connectivity_factor : float, optional, default=1.0 Only needed if connectivity='post_hoc_RE' or 'BAR_variance'. Values greater than 1.0 weaken the connectivity conditions. For 'post_hoc_RE' this multiplies the number of hypothetically observed transitions. For 'BAR_variance' this scales the threshold for the minimal allowed variance of free energy differences. direct_space : bool, optional, default=False Whether to perform the self-consistent iteration with Boltzmann factors (direct space) or free energies (log-space). When analyzing data from multi-temperature simulations, direct-space is not recommended. N_dtram_accelerations : int, optional, default=0 Convergence of TRAM can be speeded up by interleaving the updates in the self-consistent iteration with a dTRAM-like update step. N_dtram_accelerations says how many times the dTRAM-like update step should be applied in every iteration of the TRAM equations. Currently this is only effective if direct_space=True. init : str, optional, default=None Use a specific initialization for self-consistent iteration: | None: use a hard-coded guess for free energies and Lagrangian multipliers | 'mbar': perform a short MBAR estimate to initialize the free energies init_maxiter : int, optional, default=5000 The maximum number of self-consistent iterations during the initialization. init_maxerr : float, optional, default=1.0E-8 Convergence criterion for the initialization. overcounting_factor : double, default = 1.0 Only needed if equilibrium contains True (TRAMMBAR). Sets the relative statistical weight of equilibrium and non-equilibrium frames. An overcounting_factor of value n means that every non-equilibrium frame is counted n times. Values larger than 1 increase the relative weight of the non-equilibrium data. Values less than 1 increase the relative weight of the equilibrium data. References ---------- .. [1] Wu, H. et al 2016 Multiensemble Markov models of molecular thermodynamics and kinetics Proc. Natl. Acad. Sci. USA 113 E3221--E3230 .. [2]_ Hukushima et al, Exchange Monte Carlo method and application to spin glass simulations, J. Phys. Soc. Jan. 65, 1604 (1996) .. [3]_ Shirts and Chodera, Statistically optimal analysis of samples from multiple equilibrium states, J. Chem. Phys. 129, 124105 (2008) """ self.lag = lag assert count_mode == 'sliding', 'Currently the only implemented count_mode is \'sliding\'' self.count_mode = count_mode self.connectivity = connectivity self.nn = nn self.connectivity_factor = connectivity_factor self.dt_traj = dt_traj self.timestep_traj = _TimeUnit(dt_traj) self.nstates_full = nstates_full self.equilibrium = equilibrium self.maxiter = maxiter self.maxerr = maxerr self.direct_space = direct_space self.N_dtram_accelerations = N_dtram_accelerations self.callback = callback self.save_convergence_info = save_convergence_info assert init in ( None, 'mbar'), 'Currently only None and \'mbar\' are supported' self.init = init self.init_maxiter = init_maxiter self.init_maxerr = init_maxerr self.overcounting_factor = overcounting_factor self.active_set = None self.biased_conf_energies = None self.mbar_therm_energies = None self.log_lagrangian_mult = None self.loglikelihoods = None
def __init__( self, bias_energies_full, lag, count_mode='sliding', connectivity='largest', maxiter=10000, maxerr=1.0E-15, save_convergence_info=0, dt_traj='1 step', init=None, init_maxiter=10000, init_maxerr=1.0E-8): r""" Discrete Transition(-based) Reweighting Analysis Method Parameters ---------- bias_energies_full : numpy.ndarray(shape=(num_therm_states, num_conf_states)) object bias_energies_full[j, i] is the bias energy in units of kT for each discrete state i at thermodynamic state j. lag : int Integer lag time at which transitions are counted. count_mode : str, optional, default='sliding' Mode to obtain count matrices from discrete trajectories. Should be one of: * 'sliding' : a trajectory of length T will have :math:`T-\tau` counts at time indexes .. math:: (0 \rightarrow \tau), (1 \rightarrow \tau+1), ..., (T-\tau-1 \rightarrow T-1) * 'sample' : a trajectory of length T will have :math:`T/\tau` counts at time indexes .. math:: (0 \rightarrow \tau), (\tau \rightarrow 2 \tau), ..., ((T/\tau-1) \tau \rightarrow T) Currently only 'sliding' is supported. connectivity : str, optional, default='largest' Defines what should be considered a connected set in the joint space of conformations and thermodynamic ensembles. Currently only 'largest' is supported. maxiter : int, optional, default=10000 The maximum number of self-consistent iterations before the estimator exits unsuccessfully. maxerr : float, optional, default=1.0E-15 Convergence criterion based on the maximal free energy change in a self-consistent iteration step. save_convergence_info : int, optional, default=0 Every save_convergence_info iteration steps, store the actual increment and the actual loglikelihood; 0 means no storage. dt_traj : str, optional, default='1 step' Description of the physical time corresponding to the lag. May be used by analysis algorithms such as plotting tools to pretty-print the axes. By default '1 step', i.e. there is no physical time unit. Specify by a number, whitespace and unit. Permitted units are (* is an arbitrary string): | 'fs', 'femtosecond*' | 'ps', 'picosecond*' | 'ns', 'nanosecond*' | 'us', 'microsecond*' | 'ms', 'millisecond*' | 's', 'second*' init : str, optional, default=None Use a specific initialization for self-consistent iteration: | None: use a hard-coded guess for free energies and Lagrangian multipliers | 'wham': perform a short WHAM estimate to initialize the free energies init_maxiter : int, optional, default=10000 The maximum number of self-consistent iterations during the initialization. init_maxerr : float, optional, default=1.0E-8 Convergence criterion for the initialization. Example ------- >>> from pyemma.thermo import DTRAM >>> import numpy as np >>> B = np.array([[0, 0],[0.5, 1.0]]) >>> dtram = DTRAM(B, 1) >>> ttrajs = [np.array([0,0,0,0,0,0,0,0,0,0]),np.array([1,1,1,1,1,1,1,1,1,1])] >>> dtrajs = [np.array([0,0,0,0,1,1,1,0,0,0]),np.array([0,1,0,1,0,1,1,0,0,1])] >>> dtram = dtram.estimate((ttrajs, dtrajs)) >>> dtram.log_likelihood() # doctest: +ELLIPSIS -9.805... >>> dtram.count_matrices # doctest: +SKIP array([[[5, 1], [1, 2]], [[1, 4], [3, 1]]], dtype=int32) >>> dtram.stationary_distribution # doctest: +ELLIPSIS array([ 0.38..., 0.61...]) >>> dtram.meval('stationary_distribution') # doctest: +ELLIPSIS [array([ 0.38..., 0.61...]), array([ 0.50..., 0.49...])] References ---------- .. [1] Wu, H. et al 2014 Statistically optimal analysis of state-discretized trajectory data from multiple thermodynamic states J. Chem. Phys. 141, 214106 """ # set all parameters self.bias_energies_full = _types.ensure_ndarray(bias_energies_full, ndim=2, kind='numeric') self.lag = lag assert count_mode == 'sliding', 'Currently the only implemented count_mode is \'sliding\'' self.count_mode = count_mode assert connectivity == 'largest', 'Currently the only implemented connectivity is \'largest\'' self.connectivity = connectivity self.dt_traj = dt_traj self.maxiter = maxiter self.maxerr = maxerr self.save_convergence_info = save_convergence_info assert init in (None, 'wham'), 'Currently only None and \'wham\' are supported' self.init = init self.init_maxiter = init_maxiter self.init_maxerr = init_maxerr # set derived quantities self.nthermo, self.nstates_full = bias_energies_full.shape self.timestep_traj = _TimeUnit(dt_traj) # set iteration variables self.therm_energies = None self.conf_energies = None self.log_lagrangian_mult = None
def __init__(self, lag=1, reversible=True, count_mode='sliding', sparse=False, connectivity='largest', dt_traj='1 step'): r"""Maximum likelihood estimator for MSMs given discrete trajectory statistics Parameters ---------- lag : int lag time at which transitions are counted and the transition matrix is estimated. reversible : bool, optional, default = True If true compute reversible MSM, else non-reversible MSM count_mode : str, optional, default='sliding' mode to obtain count matrices from discrete trajectories. Should be one of: * 'sliding' : A trajectory of length T will have :math:`T-tau` counts at time indexes .. math:: (0 \rightarrow \tau), (1 \rightarrow \tau+1), ..., (T-\tau-1 \rightarrow T-1) * 'effective' : Uses an estimate of the transition counts that are statistically uncorrelated. Recommended when used with a Bayesian MSM. * 'sample' : A trajectory of length T will have :math:`T/tau` counts at time indexes .. math:: (0 \rightarrow \tau), (\tau \rightarrow 2 \tau), ..., (((T/tau)-1) \tau \rightarrow T) sparse : bool, optional, default = False If true compute count matrix, transition matrix and all derived quantities using sparse matrix algebra. In this case python sparse matrices will be returned by the corresponding functions instead of numpy arrays. This behavior is suggested for very large numbers of states (e.g. > 4000) because it is likely to be much more efficient. connectivity : str, optional, default = 'largest' Connectivity mode. Three methods are intended (currently only 'largest' is implemented) * 'largest' : The active set is the largest reversibly connected set. All estimation will be done on this subset and all quantities (transition matrix, stationary distribution, etc) are only defined on this subset and are correspondingly smaller than the full set of states * 'all' : The active set is the full set of states. Estimation will be conducted on each reversibly connected set separately. That means the transition matrix will decompose into disconnected submatrices, the stationary vector is only defined within subsets, etc. Currently not implemented. * 'none' : The active set is the full set of states. Estimation will be conducted on the full set of states without ensuring connectivity. This only permits nonreversible estimation. Currently not implemented. dt_traj : str, optional, default='1 step' Description of the physical time of the input trajectories. May be used by analysis algorithms such as plotting tools to pretty-print the axes. By default '1 step', i.e. there is no physical time unit. Specify by a number, whitespace and unit. Permitted units are (* is an arbitrary string): | 'fs', 'femtosecond*' | 'ps', 'picosecond*' | 'ns', 'nanosecond*' | 'us', 'microsecond*' | 'ms', 'millisecond*' | 's', 'second*' """ self.lag = lag # set basic parameters self.reversible = reversible # sparse matrix computation wanted? self.sparse = sparse # store counting mode (lowercase) self.count_mode = str(count_mode).lower() if self.count_mode not in ('sliding', 'effective', 'sample'): raise ValueError('count mode ' + count_mode + ' is unknown.') # store connectivity mode (lowercase) self.connectivity = connectivity.lower() if self.connectivity == 'largest': pass # this is the current default. no need to do anything elif self.connectivity == 'all': raise NotImplementedError('MSM estimation with connectivity=\'all\' is currently not implemented.') elif self.connectivity == 'none': raise NotImplementedError('MSM estimation with connectivity=\'none\' is currently not implemented.') else: raise ValueError('connectivity mode ' + str(connectivity) + ' is unknown.') # time step self.dt_traj = dt_traj self.timestep_traj = _TimeUnit(dt_traj)
def __init__(self, bias_energies_full, maxiter=10000, maxerr=1.0E-15, save_convergence_info=0, dt_traj='1 step', stride=1): r"""Weighted Histogram Analysis Method Parameters ---------- bias_energies_full : numpy.ndarray(shape=(num_therm_states, num_conf_states)) object bias_energies_full[j, i] is the bias energy in units of kT for each discrete state i at thermodynamic state j. maxiter : int, optional, default=10000 The maximum number of self-consistent iterations before the estimator exits unsuccessfully. maxerr : float, optional, default=1.0E-15 Convergence criterion based on the maximal free energy change in a self-consistent iteration step. save_convergence_info : int, optional, default=0 Every save_convergence_info iteration steps, store the actual increment and the actual loglikelihood; 0 means no storage. dt_traj : str, optional, default='1 step' Description of the physical time corresponding to the lag. May be used by analysis algorithms such as plotting tools to pretty-print the axes. By default '1 step', i.e. there is no physical time unit. Specify by a number, whitespace and unit. Permitted units are (* is an arbitrary string): | 'fs', 'femtosecond*' | 'ps', 'picosecond*' | 'ns', 'nanosecond*' | 'us', 'microsecond*' | 'ms', 'millisecond*' | 's', 'second*' stride : int, optional, default=1 not used Example ------- >>> from pyemma.thermo import WHAM >>> import numpy as np >>> B = np.array([[0, 0],[0.5, 1.0]]) >>> wham = WHAM(B) >>> ttrajs = [np.array([0,0,0,0,0,0,0,0,0,0]),np.array([1,1,1,1,1,1,1,1,1,1])] >>> dtrajs = [np.array([0,0,0,0,1,1,1,0,0,0]),np.array([0,1,0,1,0,1,1,0,0,1])] >>> wham = wham.estimate((ttrajs, dtrajs)) >>> wham.log_likelihood() # doctest: +ELLIPSIS -6.6... >>> wham.state_counts # doctest: +SKIP array([[7, 3], [5, 5]]) >>> wham.stationary_distribution # doctest: +ELLIPSIS +REPORT_NDIFF array([ 0.5..., 0.4...]) >>> wham.meval('stationary_distribution') # doctest: +ELLIPSIS +REPORT_NDIFF [array([ 0.5..., 0.4...]), array([ 0.6..., 0.3...])] References ---------- .. [1] Ferrenberg, A.M. and Swensen, R.H. 1988. New Monte Carlo Technique for Studying Phase Transitions. Phys. Rev. Lett. 23, 2635--2638 .. [2] Kumar, S. et al 1992. The Weighted Histogram Analysis Method for Free-Energy Calculations on Biomolecules. I. The Method. J. Comp. Chem. 13, 1011--1021 """ self.bias_energies_full = _types.ensure_ndarray(bias_energies_full, ndim=2, kind='numeric') self.stride = stride self.dt_traj = dt_traj self.maxiter = maxiter self.maxerr = maxerr self.save_convergence_info = save_convergence_info # set derived quantities self.nthermo, self.nstates_full = bias_energies_full.shape self.timestep_traj = _TimeUnit(dt_traj) # set iteration variables self.therm_energies = None self.conf_energies = None