def test_largest_connected_set(self): """Directed""" lcc = largest_connected_set(self.C) self.assertTrue(np.all(self.lcc_directed == np.sort(lcc))) """Undirected""" lcc = largest_connected_set(self.C, directed=False) self.assertTrue(np.all(self.lcc_undirected == np.sort(lcc)))
def _prepare_input_revpi(self, C, pi): """Max. state index visited by trajectories""" nC = C.shape[0] """Max. state index of the stationary vector array""" npi = pi.shape[0] """pi has to be defined on all states visited by the trajectories""" if nC > npi: errstr="""There are visited states for which no stationary probability is given""" raise ValueError(errstr) """Reduce pi to the 'visited set'""" pi_visited = pi[0:nC] """Find visited states with positive stationary probabilities""" pos = _np.where(pi_visited > 0.0)[0] """Reduce C to positive probability states""" C_pos = msmest.connected_cmatrix(C, lcc=pos) if C_pos.sum() == 0.0: errstr = """The set of states with positive stationary probabilities is not visited by the trajectories. A MSM reversible with respect to the given stationary vector can not be estimated""" raise ValueError(errstr) """Compute largest connected set of C_pos, undirected connectivity""" lcc = msmest.largest_connected_set(C_pos, directed=False) return pos[lcc]
def setUp(self): """Store state of the rng""" self.state = np.random.mtrand.get_state() """Reseed the rng to enforce 'deterministic' behavior""" np.random.mtrand.seed(42) """Meta-stable birth-death chain""" b = 2 q = np.zeros(7) p = np.zeros(7) q[1:] = 0.5 p[0:-1] = 0.5 q[2] = 1.0 - 10**(-b) q[4] = 10**(-b) p[2] = 10**(-b) p[4] = 1.0 - 10**(-b) bdc = BirthDeathChain(q, p) P = bdc.transition_matrix() self.dtraj = generate_traj(P, 10000, start=0) self.tau = 1 """Estimate MSM""" self.C_MSM = count_matrix(self.dtraj, self.tau, sliding=True) self.lcc_MSM = largest_connected_set(self.C_MSM) self.Ccc_MSM = largest_connected_submatrix(self.C_MSM, lcc=self.lcc_MSM) self.P_MSM = transition_matrix(self.Ccc_MSM, reversible=True) self.mu_MSM = stationary_distribution(self.P_MSM) self.k = 3 self.ts = timescales(self.P_MSM, k=self.k, tau=self.tau)
def setUpClass(cls) -> None: """Store state of the rng""" cls.state = np.random.mtrand.get_state() """Reseed the rng to enforce 'deterministic' behavior""" np.random.mtrand.seed(42) """Meta-stable birth-death chain""" b = 2 q = np.zeros(7) p = np.zeros(7) q[1:] = 0.5 p[0:-1] = 0.5 q[2] = 1.0 - 10 ** (-b) q[4] = 10 ** (-b) p[2] = 10 ** (-b) p[4] = 1.0 - 10 ** (-b) bdc = BirthDeathChain(q, p) P = bdc.transition_matrix() cls.dtraj = generate_traj(P, 10000, start=0) cls.tau = 1 """Estimate MSM""" import inspect argspec = inspect.getfullargspec(MaximumLikelihoodMSM) default_maxerr = argspec.defaults[argspec.args.index('maxerr') - 1] cls.C_MSM = msmest.count_matrix(cls.dtraj, cls.tau, sliding=True) cls.lcc_MSM = msmest.largest_connected_set(cls.C_MSM) cls.Ccc_MSM = msmest.largest_connected_submatrix(cls.C_MSM, lcc=cls.lcc_MSM) cls.P_MSM = msmest.transition_matrix(cls.Ccc_MSM, reversible=True, maxerr=default_maxerr) cls.mu_MSM = msmana.stationary_distribution(cls.P_MSM) cls.k = 3 cls.ts = msmana.timescales(cls.P_MSM, k=cls.k, tau=cls.tau)
def equilibrium_transition_matrix(Xi, omega, sigma, reversible=True, return_lcc=True): """ Compute equilibrium transition matrix from OOM components: Parameters ---------- Xi : ndarray(M, N, M) matrix of set-observable operators omega: ndarray(M,) information state vector of OOM sigma : ndarray(M,) evaluator of OOM reversible : bool, optional, default=True symmetrize corrected count matrix in order to obtain a reversible transition matrix. return_lcc: bool, optional, default=True return indices of largest connected set. Returns ------- Tt_Eq : ndarray(N, N) equilibrium transition matrix lcc : ndarray(M,) the largest connected set of the transition matrix. """ import msmtools.estimation as me # Compute equilibrium transition matrix: Ct_Eq = np.einsum('j,jkl,lmn,n->km', omega, Xi, Xi, sigma) # Remove negative entries: Ct_Eq[Ct_Eq < 0.0] = 0.0 # Compute transition matrix after symmetrization: pi_r = np.sum(Ct_Eq, axis=1) if reversible: pi_c = np.sum(Ct_Eq, axis=0) pi_sym = pi_r + pi_c # Avoid zero row-sums. States with zero row-sums will be eliminated by active set update. ind0 = np.where(pi_sym == 0.0)[0] pi_sym[ind0] = 1.0 Tt_Eq = (Ct_Eq + Ct_Eq.T) / pi_sym[:, None] else: # Avoid zero row-sums. States with zero row-sums will be eliminated by active set update. ind0 = np.where(pi_r == 0.0)[0] pi_r[ind0] = 1.0 Tt_Eq = Ct_Eq / pi_r[:, None] # Perform active set update: lcc = me.largest_connected_set(Tt_Eq) Tt_Eq = me.largest_connected_submatrix(Tt_Eq, lcc=lcc) if return_lcc: return Tt_Eq, lcc else: return Tt_Eq
def max_likelihood_estimate(self): r"""Return the maximum likelihood estimate. Returns ------- MarkovianMilestoningModel The model that maximizes the likelihood of the data. See Also -------- :func:`msmtools.estimation.transition_matrix` : Low-level function used to estimate the transition kernel. Notes ----- The transition kernel is estimated from the observed transition count matrix :math:`N` by maximizing the likelihood .. math:: \mathbb{P}(N|K)\propto\prod_{a,b}K_{ab}^{N_{ab}}. In the nonreversible case, this gives the estimate :math:`\hat{K}_{ab}=N_{ab}/N_a`, where :math:`N_a=\sum_{b}N_{ab}` is the total number of transitions starting from milestone :math:`a`. In the reversible case, the maximization is subject to the constraint of detailed balance. For details see Section III of Trendelkamp-Schroer et al. [1]_ The mean lifetime of milestone :math:`a` is estimated by :math:`\hat{\tau}_a=T_a/N_a`, where :math:`T_a` is the total time spent in milestone state :math:`a`. """ # Restrict data to the largest connected set of states. lcc = estimation.largest_connected_set( self.count_matrix, directed=(True if self.reversible else False)) states = self.states[lcc] count_matrix = self.count_matrix[lcc, :][:, lcc] total_times = self.total_times[lcc] t = total_times / count_matrix.sum(axis=1) # mean lifetimes _check_time_discretization(t, states) # Estimate transition kernel, and return MLE model. # -- Reversible case if self.reversible: K, q = estimation.transition_matrix(count_matrix, reversible=True, return_statdist=True) np.fill_diagonal(K, 0) return MarkovianMilestoningModel(K, t, stationary_flux=q, states=states, estimator=self) # -- Nonreversible case K = estimation.transition_matrix(count_matrix, reversible=False) np.fill_diagonal(K, 0) return MarkovianMilestoningModel(K, t, states=states, estimator=self)
def trim_Cmat( Cmat, lcc, ID ): minsamp = ID.trimfrac * np.sum(Cmat, dtype=float) / float(lcc.size) nrem = 0 for i in range(0,lcc.size): shift = i - nrem if ( np.sum(Cmat[shift], dtype=float) < minsamp ): # trim from matrix and bins Cmat = np.delete(Cmat, (shift), axis=0) Cmat = np.delete(Cmat, (shift), axis=1) lcc = np.delete(lcc, (shift)) nrem += 1 # reensure that the trimmed matrix is connected! lcc_tmp = largest_connected_set(Cmat, directed=True) Cmat_cc = largest_connected_submatrix(Cmat, directed=True, lcc=lcc_tmp) lcc = lcc[lcc_tmp] return Cmat_cc, lcc
def setUp(self): """Store state of the rng""" self.state = np.random.mtrand.get_state() """Reseed the rng to enforce 'deterministic' behavior""" np.random.mtrand.seed(42) """Meta-stable birth-death chain""" b = 2 q = np.zeros(7) p = np.zeros(7) q[1:] = 0.5 p[0:-1] = 0.5 q[2] = 1.0 - 10 ** (-b) q[4] = 10 ** (-b) p[2] = 10 ** (-b) p[4] = 1.0 - 10 ** (-b) bdc = BirthDeathChain(q, p) P = bdc.transition_matrix() dtraj = generate_traj(P, 10000, start=0) tau = 1 """Estimate MSM""" MSM = estimate_markov_model(dtraj, tau) C_MSM = MSM.count_matrix_full lcc_MSM = MSM.largest_connected_set Ccc_MSM = MSM.count_matrix_active P_MSM = MSM.transition_matrix mu_MSM = MSM.stationary_distribution """Meta-stable sets""" A = [0, 1, 2] B = [4, 5, 6] w_MSM = np.zeros((2, mu_MSM.shape[0])) w_MSM[0, A] = mu_MSM[A] / mu_MSM[A].sum() w_MSM[1, B] = mu_MSM[B] / mu_MSM[B].sum() K = 10 P_MSM_dense = P_MSM p_MSM = np.zeros((K, 2)) w_MSM_k = 1.0 * w_MSM for k in range(1, K): w_MSM_k = np.dot(w_MSM_k, P_MSM_dense) p_MSM[k, 0] = w_MSM_k[0, A].sum() p_MSM[k, 1] = w_MSM_k[1, B].sum() """Assume that sets are equal, A(\tau)=A(k \tau) for all k""" w_MD = 1.0 * w_MSM p_MD = np.zeros((K, 2)) eps_MD = np.zeros((K, 2)) p_MSM[0, :] = 1.0 p_MD[0, :] = 1.0 eps_MD[0, :] = 0.0 for k in range(1, K): """Build MSM at lagtime k*tau""" C_MD = count_matrix(dtraj, k * tau, sliding=True) / (k * tau) lcc_MD = largest_connected_set(C_MD) Ccc_MD = largest_connected_submatrix(C_MD, lcc=lcc_MD) c_MD = Ccc_MD.sum(axis=1) P_MD = transition_matrix(Ccc_MD).toarray() w_MD_k = np.dot(w_MD, P_MD) """Set A""" prob_MD = w_MD_k[0, A].sum() c = c_MD[A].sum() p_MD[k, 0] = prob_MD eps_MD[k, 0] = np.sqrt(k * (prob_MD - prob_MD ** 2) / c) """Set B""" prob_MD = w_MD_k[1, B].sum() c = c_MD[B].sum() p_MD[k, 1] = prob_MD eps_MD[k, 1] = np.sqrt(k * (prob_MD - prob_MD ** 2) / c) """Input""" self.MSM = MSM self.K = K self.A = A self.B = B """Expected results""" self.p_MSM = p_MSM self.p_MD = p_MD self.eps_MD = eps_MD
def init_discrete_hmm(observations, nstates, lag=1, reversible=True, stationary=True, regularize=True, method='connect-spectral', separate=None): """Use a heuristic scheme to generate an initial model. Parameters ---------- observations : list of ndarray((T_i)) list of arrays of length T_i with observation data nstates : int The number of states. lag : int Lag time at which the observations should be counted. reversible : bool Estimate reversible HMM transition matrix. stationary : bool p0 is the stationary distribution of P. Currently only reversible=True is implemented regularize : bool Regularize HMM probabilities to avoid 0's. method : str * 'lcs-spectral' : Does spectral clustering on the largest connected set of observed states. * 'connect-spectral' : Uses a weak regularization to connect the weakly connected sets and then initializes HMM using spectral clustering on the nonempty set. * 'spectral' : Uses spectral clustering on the nonempty subsets. Separated observed states will end up in separate hidden states. This option is only recommended for small observation spaces. Use connect-spectral for large observation spaces. separate : None or iterable of int Force the given set of observed states to stay in a separate hidden state. The remaining nstates-1 states will be assigned by a metastable decomposition. Examples -------- Generate initial model for a discrete output model. >>> import bhmm >>> [model, observations, states] = bhmm.testsystems.generate_synthetic_observations(output='discrete') >>> initial_model = init_discrete_hmm(observations, model.nstates) """ import msmtools.estimation as msmest from bhmm.init.discrete import init_discrete_hmm_spectral C = msmest.count_matrix(observations, lag).toarray() # regularization if regularize: eps_A = None eps_B = None else: eps_A = 0 eps_B = 0 if not stationary: raise NotImplementedError('Discrete-HMM initialization with stationary=False is not yet implemented.') if method=='lcs-spectral': lcs = msmest.largest_connected_set(C) p0, P, B = init_discrete_hmm_spectral(C, nstates, reversible=reversible, stationary=stationary, active_set=lcs, separate=separate, eps_A=eps_A, eps_B=eps_B) elif method=='connect-spectral': # make sure we're strongly connected C += msmest.prior_neighbor(C, 0.001) nonempty = _np.where(C.sum(axis=0) + C.sum(axis=1) > 0)[0] C[nonempty, nonempty] = _np.maximum(C[nonempty, nonempty], 0.001) p0, P, B = init_discrete_hmm_spectral(C, nstates, reversible=reversible, stationary=stationary, active_set=nonempty, separate=separate, eps_A=eps_A, eps_B=eps_B) elif method=='spectral': p0, P, B = init_discrete_hmm_spectral(C, nstates, reversible=reversible, stationary=stationary, active_set=None, separate=separate, eps_A=eps_A, eps_B=eps_B) else: raise NotImplementedError('Unknown discrete-HMM initialization method ' + str(method)) hmm0 = discrete_hmm(p0, P, B) hmm0._lag = lag return hmm0
def init_discrete_hmm(observations, nstates, lag=1, reversible=True, stationary=True, regularize=True, method='connect-spectral', separate=None): """Use a heuristic scheme to generate an initial model. Parameters ---------- observations : list of ndarray((T_i)) list of arrays of length T_i with observation data nstates : int The number of states. lag : int Lag time at which the observations should be counted. reversible : bool Estimate reversible HMM transition matrix. stationary : bool p0 is the stationary distribution of P. Currently only reversible=True is implemented regularize : bool Regularize HMM probabilities to avoid 0's. method : str * 'lcs-spectral' : Does spectral clustering on the largest connected set of observed states. * 'connect-spectral' : Uses a weak regularization to connect the weakly connected sets and then initializes HMM using spectral clustering on the nonempty set. * 'spectral' : Uses spectral clustering on the nonempty subsets. Separated observed states will end up in separate hidden states. This option is only recommended for small observation spaces. Use connect-spectral for large observation spaces. separate : None or iterable of int Force the given set of observed states to stay in a separate hidden state. The remaining nstates-1 states will be assigned by a metastable decomposition. Examples -------- Generate initial model for a discrete output model. >>> import bhmm >>> [model, observations, states] = bhmm.testsystems.generate_synthetic_observations(output='discrete') >>> initial_model = init_discrete_hmm(observations, model.nstates) """ import msmtools.estimation as msmest from bhmm.init.discrete import init_discrete_hmm_spectral C = msmest.count_matrix(observations, lag).toarray() # regularization if regularize: eps_A = None eps_B = None else: eps_A = 0 eps_B = 0 if not stationary: raise NotImplementedError( 'Discrete-HMM initialization with stationary=False is not yet implemented.' ) if method == 'lcs-spectral': lcs = msmest.largest_connected_set(C) p0, P, B = init_discrete_hmm_spectral(C, nstates, reversible=reversible, stationary=stationary, active_set=lcs, separate=separate, eps_A=eps_A, eps_B=eps_B) elif method == 'connect-spectral': # make sure we're strongly connected C += msmest.prior_neighbor(C, 0.001) nonempty = _np.where(C.sum(axis=0) + C.sum(axis=1) > 0)[0] C[nonempty, nonempty] = _np.maximum(C[nonempty, nonempty], 0.001) p0, P, B = init_discrete_hmm_spectral(C, nstates, reversible=reversible, stationary=stationary, active_set=nonempty, separate=separate, eps_A=eps_A, eps_B=eps_B) elif method == 'spectral': p0, P, B = init_discrete_hmm_spectral(C, nstates, reversible=reversible, stationary=stationary, active_set=None, separate=separate, eps_A=eps_A, eps_B=eps_B) else: raise NotImplementedError( 'Unknown discrete-HMM initialization method ' + str(method)) hmm0 = discrete_hmm(p0, P, B) hmm0._lag = lag return hmm0
def _estimate(self, dtrajs): """ Parameters ---------- dtrajs : list containing ndarrays(dtype=int) or ndarray(n, dtype=int) or :class:`pyemma.msm.util.dtraj_states.DiscreteTrajectoryStats` discrete trajectories, stored as integer ndarrays (arbitrary size) or a single ndarray for only one trajectory. **params : Other keyword parameters if different from the settings when this estimator was constructed Returns ------- MSM : :class:`pyemma.msm.EstimatedMSM` or :class:`pyemma.msm.MSM` """ # ensure right format dtrajs = ensure_dtraj_list(dtrajs) # harvest discrete statistics if isinstance(dtrajs, _DiscreteTrajectoryStats): dtrajstats = dtrajs else: # compute and store discrete trajectory statistics dtrajstats = _DiscreteTrajectoryStats(dtrajs) # check if this MSM seems too large to be dense if dtrajstats.nstates > 4000 and not self.sparse: self.logger.warn( 'Building a dense MSM with ' + str(dtrajstats.nstates) + ' states. This can be ' 'inefficient or unfeasible in terms of both runtime and memory consumption. ' 'Consider using sparse=True.') # count lagged dtrajstats.count_lagged(self.lag, count_mode=self.count_mode) # full count matrix and number of states self._C_full = dtrajstats.count_matrix() self._nstates_full = self._C_full.shape[0] # set active set. This is at the same time a mapping from active to full if self.connectivity == 'largest': if self.statdist_constraint is None: # statdist not given - full connectivity on all states self.active_set = dtrajstats.largest_connected_set else: # statdist given - simple connectivity on all nonzero probability states nz = _np.nonzero(self.statdist_constraint)[0] Cnz = dtrajstats.count_matrix(subset=nz) self.active_set = nz[msmest.largest_connected_set( Cnz, directed=False)] else: # for 'None' and 'all' all visited states are active self.active_set = dtrajstats.visited_set # FIXME: setting is_estimated before so that we can start using the parameters just set, but this is not clean! # is estimated self._is_estimated = True # active count matrix and number of states self._C_active = dtrajstats.count_matrix(subset=self.active_set) self._nstates = self._C_active.shape[0] # computed derived quantities # back-mapping from full to lcs self._full2active = -1 * _np.ones((dtrajstats.nstates), dtype=int) self._full2active[self.active_set] = _np.array(list( range(len(self.active_set))), dtype=int) # restrict stationary distribution to active set if self.statdist_constraint is None: statdist_active = None else: statdist_active = self.statdist_constraint[self.active_set] statdist_active /= statdist_active.sum() # renormalize # Estimate transition matrix if self.connectivity == 'largest': P = msmest.transition_matrix(self._C_active, reversible=self.reversible, mu=statdist_active, maxiter=self.maxiter, maxerr=self.maxerr) elif self.connectivity == 'none': # reversible mode only possible if active set is connected # - in this case all visited states are connected and thus # this mode is identical to 'largest' if self.reversible and not msmest.is_connected(self._C_active): raise ValueError( 'Reversible MSM estimation is not possible with connectivity mode \'none\', ' + 'because the set of all visited states is not reversibly connected' ) P = msmest.transition_matrix(self._C_active, reversible=self.reversible, mu=statdist_active, maxiter=self.maxiter, maxerr=self.maxerr) else: raise NotImplementedError( 'MSM estimation with connectivity=\'self.connectivity\' is currently not implemented.' ) # continue sparse or dense? if not self.sparse: # converting count matrices to arrays. As a result the # transition matrix and all subsequent properties will be # computed using dense arrays and dense matrix algebra. self._C_full = self._C_full.toarray() self._C_active = self._C_active.toarray() P = P.toarray() # Done. We set our own model parameters, so this estimator is # equal to the estimated model. self._dtrajs_full = dtrajs self._connected_sets = msmest.connected_sets(self._C_full) self.set_model_params(P=P, pi=statdist_active, reversible=self.reversible, dt_model=self.timestep_traj.get_scaled(self.lag)) return self
def posterior_sample(self, size=100): r"""Generate a sample from the posterior distribution. Parameters ---------- size : int, optional The sample size, i.e., the number of models to generate. Returns ------- Collection[MarkovianMilestoningModel] The sampled models. See Also -------- :func:`msmtools.estimation.tmatrix_sampler` : Low-level function used to sample transition kernels. Notes ----- Transition kernels are sampled from the posterior distribution .. math:: \mathbb{P}(K|N) \propto \mathbb{P}(K) \prod_{a,b} K_{ab}^{N_{ab}}, where the prior :math:`\mathbb{P}(K)` depends on whether detailed balance is assumed. For details see Section IV of Trendelkamp-Schroer et al. [1]_ Sampling is initiated from the maximum likelihood estimate of :math:`K`. The mean lifetime of milestone :math:`a` is sampled from an inverse Gamma distribution with shape :math:`N_a` and scale :math:`T_a`. """ # Restrict data to the largest connected set of states. lcc = estimation.largest_connected_set( self.count_matrix, directed=(True if self.reversible else False)) states = self.states[lcc] count_matrix = self.count_matrix[lcc, :][:, lcc] total_times = self.total_times[lcc] total_counts = count_matrix.sum(axis=1) _check_time_discretization(total_times / total_counts, states) # Sample jump rates (inverse mean lifetimes). rng = np.random.default_rng() vs = np.zeros((size, len(states))) for i, (n, r) in enumerate(zip(total_counts, total_times)): vs[:, i] = rng.gamma(n, scale=1/r, size=size) # Initialize transition matrix sampler. K_mle = estimation.transition_matrix( count_matrix, reversible=self.reversible) sampler = estimation.tmatrix_sampler( count_matrix, reversible=self.reversible, T0=K_mle) # Sample transition kernels, and return sampled models. # -- Reversible case if self.reversible: Ks, qs = sampler.sample(nsamples=size, return_statdist=True) for K in Ks: np.fill_diagonal(K, 0) return [MarkovianMilestoningModel(K, 1/v, stationary_flux=q, states=states, estimator=self) for K, v, q in zip(Ks, vs, qs)] # -- Nonreversible case Ks = sampler.sample(nsamples=size) for K in Ks: np.fill_diagonal(K, 0) return [MarkovianMilestoningModel(K, 1/v, states=states, estimator=self) for K, v in zip(Ks, vs)]