def update(self, Tij, Pi=None): r""" Updates the transition matrix and recomputes all derived quantities """ # EMMA imports from msmtools import analysis as msmana # save a copy of the transition matrix self._Tij = np.array(Tij) assert msmana.is_transition_matrix( self._Tij), 'Given transition matrix is not a stochastic matrix' assert self._Tij.shape[ 0] == self._nstates, 'Given transition matrix has unexpected number of states ' # initial / stationary distribution if (Pi is not None): assert np.all( Pi >= 0 ), 'Given initial distribution contains negative elements.' Pi = np.array(Pi) / np.sum( Pi) # ensure normalization and make a copy if (self._stationary): pT = msmana.stationary_distribution(self._Tij) if Pi is None: # stationary and no stationary distribution fixed, so computing it from trans. mat. self._Pi = pT else: # stationary but stationary distribution is fixed, so the transition matrix must be consistent assert np.allclose(Pi, pT), 'Stationary HMM requested, but given distribution is not the ' \ 'stationary distribution of the given transition matrix.' self._Pi = Pi else: if Pi is None: # no initial distribution given, so use stationary distribution anyway self._Pi = msmana.stationary_distribution(self._Tij) else: self._Pi = Pi # reversible if self._reversible: assert msmana.is_reversible( Tij ), 'Reversible HMM requested, but given transition matrix is not reversible.' # try to do eigendecomposition by default, because it's very cheap for hidden transition matrices from scipy.linalg import LinAlgError try: if self._reversible: self._R, self._D, self._L = msmana.rdl_decomposition( self._Tij, norm='reversible') # everything must be real-valued self._R = self._R.real self._D = self._D.real self._L = self._L.real else: self._R, self._D, self._L = msmana.rdl_decomposition( self._Tij, norm='standard') self._eigenvalues = np.diag(self._D) self._spectral_decomp_available = True except LinAlgError: logger().warn('Eigendecomposition failed for transition matrix\n' + str(self._Tij) + '\nspectral properties will not be available') self._spectral_decomp_available = False
def update(self, Tij, Pi=None): r""" Updates the transition matrix and recomputes all derived quantities """ # EMMA imports from msmtools import analysis as msmana # save a copy of the transition matrix self._Tij = np.array(Tij) assert msmana.is_transition_matrix(self._Tij), 'Given transition matrix is not a stochastic matrix' assert self._Tij.shape[0] == self._nstates, 'Given transition matrix has unexpected number of states ' # initial / stationary distribution if (Pi is not None): assert np.all(Pi >= 0), 'Given initial distribution contains negative elements.' Pi = np.array(Pi) / np.sum(Pi) # ensure normalization and make a copy if (self._stationary): pT = msmana.stationary_distribution(self._Tij) if Pi is None: # stationary and no stationary distribution fixed, so computing it from trans. mat. self._Pi = pT else: # stationary but stationary distribution is fixed, so the transition matrix must be consistent assert np.allclose(Pi, pT), 'Stationary HMM requested, but given distribution is not the ' \ 'stationary distribution of the given transition matrix.' self._Pi = Pi else: if Pi is None: # no initial distribution given, so use stationary distribution anyway self._Pi = msmana.stationary_distribution(self._Tij) else: self._Pi = Pi # reversible if self._reversible: assert msmana.is_reversible(Tij), 'Reversible HMM requested, but given transition matrix is not reversible.' # try to do eigendecomposition by default, because it's very cheap for hidden transition matrices from scipy.linalg import LinAlgError try: if self._reversible: self._R, self._D, self._L = msmana.rdl_decomposition(self._Tij, norm='reversible') # everything must be real-valued self._R = self._R.real self._D = self._D.real self._L = self._L.real else: self._R, self._D, self._L = msmana.rdl_decomposition(self._Tij, norm='standard') self._eigenvalues = np.diag(self._D) self._spectral_decomp_available = True except LinAlgError: logger().warn('Eigendecomposition failed for transition matrix\n'+str(self._Tij)+ '\nspectral properties will not be available') self._spectral_decomp_available = False
def setUp(self): """Store state of the rng""" self.state = np.random.mtrand.get_state() """Reseed the rng to enforce 'deterministic' behavior""" np.random.mtrand.seed(42) """Meta-stable birth-death chain""" b = 2 q = np.zeros(7) p = np.zeros(7) q[1:] = 0.5 p[0:-1] = 0.5 q[2] = 1.0 - 10**(-b) q[4] = 10**(-b) p[2] = 10**(-b) p[4] = 1.0 - 10**(-b) bdc = BirthDeathChain(q, p) P = bdc.transition_matrix() self.dtraj = generate_traj(P, 10000, start=0) self.tau = 1 """Estimate MSM""" self.C_MSM = count_matrix(self.dtraj, self.tau, sliding=True) self.lcc_MSM = largest_connected_set(self.C_MSM) self.Ccc_MSM = largest_connected_submatrix(self.C_MSM, lcc=self.lcc_MSM) self.P_MSM = transition_matrix(self.Ccc_MSM, reversible=True) self.mu_MSM = stationary_distribution(self.P_MSM) self.k = 3 self.ts = timescales(self.P_MSM, k=self.k, tau=self.tau)
def sample(self, N=1, return_statdist=False): self.update(N=N) if return_statdist: pi = stationary_distribution(self.P) return self.P, pi else: return self.P
def coarsegrain(P, n): """ Coarse-grains transition matrix P to n sets using PCCA Coarse-grains transition matrix P such that the dominant eigenvalues are preserved, using: ..math: \tilde{P} = M^T P M (M^T M)^{-1} See [2]_ for the derivation of this form from the coarse-graining method first derived in [1]_. References ---------- [1] S. Kube and M. Weber A coarse graining method for the identification of transition rates between molecular conformations. J. Chem. Phys. 126, 024103 (2007) [2] F. Noe, H. Wu, J.-H. Prinz and N. Plattner: Projected and hidden Markov models for calculating kinetics and metastable states of complex molecules J. Chem. Phys. 139, 184114 (2013) """ M = pcca(P, n) # coarse-grained transition matrix W = np.linalg.inv(np.dot(M.T, M)) A = np.dot(np.dot(M.T, P), M) P_coarse = np.dot(W, A) # symmetrize and renormalize to eliminate numerical errors from msmtools.analysis import stationary_distribution pi_coarse = np.dot(M.T, stationary_distribution(P)) X = np.dot(np.diag(pi_coarse), P_coarse) P_coarse = X / X.sum(axis=1)[:, None] return P_coarse
def test_trajectories(self): P = np.array([[0.9, 0.1], [0.1, 0.9]]) # test number of trajectories M = 10 N = 10 trajs = msmgen.generate_trajs(P, M, N, start=0) assert len(trajs) == M # test statistics of starting state trajs = msmgen.generate_trajs(P, 1000, 1) ss = np.concatenate(trajs).astype(int) pi = msmana.stationary_distribution(P) piest = msmest.count_states(ss) / 1000.0 assert np.max(np.abs(pi - piest)) < 0.025 # test stopping state = starting state M = 10 trajs = msmgen.generate_trajs(P, M, N, start=0, stop=0) for traj in trajs: assert traj.size == 1 # test if we always stop at stopping state M = 100 stop = 1 trajs = msmgen.generate_trajs(P, M, N, start=0, stop=stop) for traj in trajs: assert traj.size == N or traj[-1] == stop assert stop not in traj[:-1]
def setUpClass(cls) -> None: """Store state of the rng""" cls.state = np.random.mtrand.get_state() """Reseed the rng to enforce 'deterministic' behavior""" np.random.mtrand.seed(42) """Meta-stable birth-death chain""" b = 2 q = np.zeros(7) p = np.zeros(7) q[1:] = 0.5 p[0:-1] = 0.5 q[2] = 1.0 - 10 ** (-b) q[4] = 10 ** (-b) p[2] = 10 ** (-b) p[4] = 1.0 - 10 ** (-b) bdc = BirthDeathChain(q, p) P = bdc.transition_matrix() cls.dtraj = generate_traj(P, 10000, start=0) cls.tau = 1 """Estimate MSM""" import inspect argspec = inspect.getfullargspec(MaximumLikelihoodMSM) default_maxerr = argspec.defaults[argspec.args.index('maxerr') - 1] cls.C_MSM = msmest.count_matrix(cls.dtraj, cls.tau, sliding=True) cls.lcc_MSM = msmest.largest_connected_set(cls.C_MSM) cls.Ccc_MSM = msmest.largest_connected_submatrix(cls.C_MSM, lcc=cls.lcc_MSM) cls.P_MSM = msmest.transition_matrix(cls.Ccc_MSM, reversible=True, maxerr=default_maxerr) cls.mu_MSM = msmana.stationary_distribution(cls.P_MSM) cls.k = 3 cls.ts = msmana.timescales(cls.P_MSM, k=cls.k, tau=cls.tau)
def force_spectroscopy_model(): """ Construct a specific three-state test model intended to be representative of single-molecule force spectroscopy experiments. Returns ------- model : HMM The synthetic HMM model. Examples -------- >>> model = force_spectroscopy_model() """ nstates = 3 # Define state emission probabilities. output_model = GaussianOutputModel(nstates, means=[3.0, 4.7, 5.6], sigmas=[1.0, 0.3, 0.2]) # Define a reversible transition matrix. Tij = np.array([[0.98 , 0.01540412, 0.00459588], [0.06331175, 0.9 , 0.03668825], [0.00339873, 0.00660127, 0.99 ]]) # Use stationary distribution as initial distribution import msmtools.analysis as msmana pi = msmana.stationary_distribution(Tij) # Construct HMM with these parameters. from bhmm import HMM model = HMM(pi, Tij, output_model) return model
def transition_matrix_reversible_pisym(C): r""" Estimates reversible transition matrix as follows: ..:math: p_{ij} = c_{ij} / c_i where c_i = sum_j c_{ij} \pi_j = \sum_j \pi_i p_{ij} x_{ij} = \pi_i p_{ij} + \pi_j p_{ji} p^{rev}_{ij} = x_{ij} / x_i where x_i = sum_j x_{ij} In words: takes the nonreversible transition matrix estimate, uses its stationary distribution to compute an equilibrium correlation matrix, symmetrizes that correlation matrix and then normalizes to the reversible transition matrix estimate. Parameters ---------- C: ndarray, shape (n,n) count matrix Returns ------- T: Estimated transition matrix """ # nonreversible estimate T_nonrev = transition_matrix_non_reversible(C) from msmtools.analysis import stationary_distribution pi = stationary_distribution(T_nonrev) # correlation matrix X = pi[:, None] * T_nonrev X = X.T + X # result T_rev = X / X.sum(axis=1)[:, None] return T_rev
def pcca(P, m): """PCCA+ spectral clustering method with optimized memberships [1]_ Clusters the first m eigenvectors of a transition matrix in order to cluster the states. This function does not assume that the transition matrix is fully connected. Disconnected sets will automatically define the first metastable states, with perfect membership assignments. Parameters ---------- P : ndarray (n,n) Transition matrix. m : int Number of clusters to group to. References ---------- [1] S. Roeblitz and M. Weber, Fuzzy spectral clustering by PCCA+: application to Markov state models and data classification. Adv Data Anal Classif 7, 147-179 (2013). """ assert 0 < m <= P.shape[0] from scipy.sparse import issparse if issparse(P): warnings.warn( 'PCCA is only implemented for dense matrices, ' 'converting sparse transition matrix to dense ndarray.', stacklevel=2) P = P.toarray() # memberships # TODO: can be improved. pcca computes stationary distribution internally, we don't need to compute it twice. from msmtools.analysis.dense.pcca import pcca as _algorithm_impl M = _algorithm_impl(P, m) # stationary distribution # TODO: in msmtools we recomputed this from P, we actually want to use pi from the msm obj, but this caused #1208 from msmtools.analysis import stationary_distribution pi = stationary_distribution(P) # coarse-grained stationary distribution pi_coarse = np.dot(M.T, pi) # HMM output matrix B = mdot(np.diag(1.0 / pi_coarse), M.T, np.diag(pi)) # renormalize B to make it row-stochastic B /= B.sum(axis=1)[:, None] # coarse-grained transition matrix W = np.linalg.inv(np.dot(M.T, M)) A = np.dot(np.dot(M.T, P), M) P_coarse = np.dot(W, A) # symmetrize and renormalize to eliminate numerical errors X = np.dot(np.diag(pi_coarse), P_coarse) P_coarse = X / X.sum(axis=1)[:, None] return PCCAModel(P_coarse, pi_coarse, M, B)
def initial_guess_gaussian_from_data(dtrajs, n_hidden_states, reversible): r""" Makes an initial guess :class:`HMM <HiddenMarkovStateModel>` with Gaussian output model. To this end, a Gaussian mixture model is estimated using `scikit-learn <https://scikit-learn.org/>`_. Parameters ---------- dtrajs : array_like or list of array_like Trajectories which are used for making the initial guess. n_hidden_states : int Number of hidden states. reversible : bool Whether the hidden transition matrix is estimated so that it is reversible. Returns ------- hmm_init : HiddenMarkovStateModel An initial guess for the HMM See Also -------- GaussianOutputModel : The type of output model this heuristic uses. initial_guess_discrete_from_data : Initial guess with :class:`Discrete output model <sktime.markov.hmm.DiscreteOutputModel>`. initial_guess_discrete_from_msm : Initial guess from an already existing :class:`MSM <sktime.markov.msm.MarkovStateModel>` with discrete output model. """ from sklearn.mixture import GaussianMixture dtrajs = ensure_dtraj_list(dtrajs) collected_observations = np.concatenate(dtrajs) gmm = GaussianMixture(n_components=n_hidden_states) gmm.fit(collected_observations[:, None]) output_model = GaussianOutputModel(n_hidden_states, means=gmm.means_[:, 0], sigmas=np.sqrt(gmm.covariances_[:, 0])) # Compute fractional state memberships. Nij = np.zeros((n_hidden_states, n_hidden_states)) for o_t in dtrajs: # length of trajectory T = o_t.shape[0] # output probability pobs = output_model.to_state_probability_trajectory(o_t) # normalize pobs /= pobs.sum(axis=1)[:, None] # Accumulate fractional transition counts from this trajectory. for t in range(T - 1): Nij += np.outer(pobs[t, :], pobs[t + 1, :]) # Compute transition matrix maximum likelihood estimate. import msmtools.estimation as msmest import msmtools.analysis as msmana Tij = msmest.transition_matrix(Nij, reversible=reversible) pi = msmana.stationary_distribution(Tij) return HiddenMarkovStateModel(transition_model=Tij, output_model=output_model, initial_distribution=pi)
def run(self, maxiter=100000, on_error='raise'): from msmtools.estimation import transition_matrix from msmtools.analysis import stationary_distribution if self.pi is None: self.T = transition_matrix(self.C, reversible=True) self.pi = stationary_distribution(self.T) else: self.T = transition_matrix(self.C, reversible=True, mu=self.pi) self.K = (self.T - np.eye(self.N)) / self.dt return self.K
def test_stats(self): # test statistics of starting state N = 5000 trajs = msmgen.generate_trajs(self.P, N, 1, random_state=self.random_state) ss = np.concatenate(trajs).astype(int) pi = msmana.stationary_distribution(self.P) piest = msmest.count_states(ss) / float(N) np.testing.assert_allclose(piest, pi, atol=0.025)
def init_model_gaussian1d(observations, n_states, lag, reversible=True): """Generate an initial model with 1D-Gaussian output densities Parameters ---------- observations : list of ndarray((T_i), dtype=float) list of arrays of length T_i with observation data n_states : int The number of states. Examples -------- Generate initial model for a gaussian output model. >>> from sktime.markovprocess.bhmm import testsystems >>> model, observations, states = testsystems.generate_synthetic_observations(output='gaussian') >>> initial_model = init_model_gaussian1d(observations, model.n_states, lag=1) """ # Concatenate all observations. collected_observations = np.concatenate(observations) # Fit a Gaussian mixture model to obtain emission distributions and state stationary probabilities. from sklearn.mixture import GaussianMixture gmm = GaussianMixture(n_components=n_states) gmm.fit(collected_observations[:, None]) output_model = GaussianOutputModel(n_states, means=gmm.means_[:, 0], sigmas=np.sqrt(gmm.covariances_[:, 0])) # Compute fractional state memberships. Nij = np.zeros((n_states, n_states)) for o_t in observations: # length of trajectory T = o_t.shape[0] # output probability pobs = output_model.p_obs(o_t) # normalize pobs /= pobs.sum(axis=1)[:, None] # Accumulate fractional transition counts from this trajectory. for t in range(T - 1): Nij += np.outer(pobs[t, :], pobs[t + 1, :]) # Compute transition matrix maximum likelihood estimate. import msmtools.estimation as msmest import msmtools.analysis as msmana Tij = msmest.transition_matrix(Nij, reversible=reversible) pi = msmana.stationary_distribution(Tij) # Update model. model = HMM(pi, Tij, output_model, lag=lag) return model
def stationary_distribution(self): r""" Compute stationary distribution of hidden states if possible. Raises ------ ValueError if the HMM is not stationary """ assert _tmatrix_disconnected.is_connected(self._Tij, strong=False), \ 'No unique stationary distribution because transition matrix is not connected' import msmtools.analysis as msmana return msmana.stationary_distribution(self._Tij)
def stationary_distribution(self): r""" Compute stationary distribution of hidden states if possible. Raises ------ ValueError if the HMM is not stationary """ from msmtools.analysis import is_connected, stationary_distribution if not is_connected(self.transition_matrix, directed=False): raise RuntimeError( 'No unique stationary distribution because transition matrix is not connected' ) return stationary_distribution(self._Tij)
def set_model_params(self, P, m): """ Parameters ---------- P : ndarray (n,n) Transition matrix. m : int Number of clusters to group to. """ # remember input from scipy.sparse import issparse if issparse(P): warnings.warn( 'PCCA is only implemented for dense matrices, ' 'converting sparse transition matrix to dense ndarray.') P = P.toarray() self.P = P self.m = m # pcca coarse-graining # -------------------- # PCCA memberships # TODO: can be improved. pcca computes stationary distribution internally, we don't need to compute it twice. from msmtools.analysis.dense.pcca import pcca self._M = pcca(P, m) # stationary distribution # TODO: in msmtools we recomputed this from P, we actually want to use pi from the msm obj, but this caused #1208 from msmtools.analysis import stationary_distribution self._pi = stationary_distribution(P) # coarse-grained stationary distribution self._pi_coarse = np.dot(self._M.T, self._pi) # HMM output matrix self._B = np.dot(np.dot(np.diag(1.0 / self._pi_coarse), self._M.T), np.diag(self._pi)) # renormalize B to make it row-stochastic self._B /= self._B.sum(axis=1)[:, None] self._B /= self._B.sum(axis=1)[:, None] # coarse-grained transition matrix W = np.linalg.inv(np.dot(self._M.T, self._M)) A = np.dot(np.dot(self._M.T, P), self._M) self._P_coarse = np.dot(W, A) # symmetrize and renormalize to eliminate numerical errors X = np.dot(np.diag(self._pi_coarse), self._P_coarse) self._P_coarse = X / X.sum(axis=1)[:, None]
def trajectory(self, N, start=None, stop=None): """ Generates a trajectory realization of length N, starting from state s Parameters ---------- N : int trajectory length start : int, optional, default = None starting state. If not given, will sample from the stationary distribution of P stop : int or int-array-like, optional, default = None stopping set. If given, the trajectory will be stopped before N steps once a state of the stop set is reached """ # check input stop = types.ensure_int_vector_or_None(stop, require_order=False) if start is None: if self.mudist is None: # compute mu, the stationary distribution of P import msmtools.analysis as msmana mu = msmana.stationary_distribution(self.P) self.mudist = scipy.stats.rv_discrete( values=(list(range(self.n)), mu)) # sample starting point from mu start = self.mudist.rvs() # evaluate stopping set stopat = np.ndarray((self.n), dtype=bool) stopat[:] = False if (stop is not None): for s in np.array(stop): stopat[s] = True # result traj = np.zeros(N, dtype=int) traj[0] = start # already at stopping state? if stopat[traj[0]]: return traj[:1] # else run until end or stopping state for t in range(1, N): traj[t] = self.rgs[traj[t - 1]].rvs() if stopat[traj[t]]: return traj[:t + 1] # return return traj
def is_reversible(P): """ Returns if P is reversible on its weakly connected sets """ import msmtools.analysis as msmana # treat each weakly connected set separately sets = connected_sets(P, strong=False) for s in sets: Ps = P[s, :][:, s] if not msmana.is_transition_matrix(Ps): return False # isn't even a transition matrix! pi = msmana.stationary_distribution(Ps) X = pi[:, None] * Ps if not np.allclose(X, X.T): return False # survived. return True
def run(self, maxiter=100000, on_error='raise'): from msmtools.estimation import transition_matrix from msmtools.analysis import stationary_distribution if self.pi is None: self.T = transition_matrix(self.C, reversible=True) self.pi = stationary_distribution(self.T) else: self.T = transition_matrix(self.C, reversible=True, mu=self.pi) self.K = np.maximum( np.array(sp.linalg.logm(np.dot(self.T, self.T)) / (2.0 * self.dt)), 0) np.fill_diagonal(self.K, 0) np.fill_diagonal(self.K, -sum1(self.K)) return self.K
def enforce_reversible_on_closed(P): """ Enforces transition matrix P to be reversible on its closed sets. """ import msmtools.analysis as msmana Prev = P.copy() # treat each weakly connected set separately sets = closed_sets(P) for s in sets: indices = np.ix_(s, s) # compute stationary probability pi_s = msmana.stationary_distribution(P[indices]) # symmetrize X_s = pi_s[:, None] * P[indices] X_s = 0.5 * (X_s + X_s.T) # normalize Prev[indices] = X_s / X_s.sum(axis=1)[:, None] return Prev
def stationary_distribution(self): """The stationary distribution of the genotype-phenotype-map.""" stat_dist = nx.get_node_attributes(self, name="stationary_distribution") if stat_dist: return stat_dist else: stat_dist = { node: prob for node, prob in enumerate( mana.stationary_distribution(self.transition_matrix)) } nx.set_node_attributes(self, name="stationary_distribution", values=stat_dist) return nx.get_node_attributes(self, name="stationary_distribution")
def enforce_reversible_on_closed(P): """ Enforces transition matrix P to be reversible on its closed sets. """ import msmtools.analysis as msmana n = np.shape(P)[0] Prev = P.copy() # treat each weakly connected set separately sets = closed_sets(P) for s in sets: I = np.ix_(s, s) # compute stationary probability pi_s = msmana.stationary_distribution(P[I]) # symmetrize X_s = pi_s[:, None] * P[I] X_s = 0.5 * (X_s + X_s.T) # normalize Prev[I] = X_s / X_s.sum(axis=1)[:, None] return Prev
def _transition_matrix_samples(self, msm, given_pi): Psamples = [s.transition_matrix for s in msm.samples] # shape assert np.array_equal(np.shape(Psamples), (self.nsamples, self.n_states, self.n_states)) # consistency import msmtools.analysis as msmana for P in Psamples: assert msmana.is_transition_matrix(P) try: assert msmana.is_reversible(P) except AssertionError: # re-do calculation msmtools just performed to get details from msmtools.analysis import stationary_distribution mu = stationary_distribution(P) X = mu[:, np.newaxis] * P np.testing.assert_allclose(X, np.transpose(X), atol=1e-12, err_msg="P not reversible, given_pi={}".format(given_pi))
def stationary_distribution(rate_matrix): """Compute the stationary distribution of a rate matrix. Parameters ---------- rate_matrix : (M, M) array_like A transition rate matrix, with row sums equal to zero. Returns ------- (M,) ndarray The stationary distribution of `rate_matrix`. """ P = jump_matrix(rate_matrix) mu = -msmana.stationary_distribution(P) / np.diagonal(rate_matrix) return mu / mu.sum()
def test_two_state_model(self): """Test the creation of a simple two-state HMM model with analytical parameters. """ # Create a simple two-state model. n_states = 2 Tij = testsystems.generate_transition_matrix(reversible=True) # stationary distribution import msmtools.analysis as msmana Pi = msmana.stationary_distribution(Tij) means = [-1, +1] sigmas = [1, 1] output_model = GaussianOutputModel(n_states, means=means, sigmas=sigmas) model = bhmm.HMM(Pi, Tij, output_model) # Test model is correct. assert_array_almost_equal(model.transition_matrix, Tij) assert_array_almost_equal(model.stationary_distribution, Pi) assert(np.allclose(model.output_model.means, np.array(means))) assert(np.allclose(model.output_model.sigmas, np.array(sigmas)))
def test_two_state_model(self): """Test the creation of a simple two-state HMM model with analytical parameters. """ from bhmm import HMM # Create a simple two-state model. nstates = 2 Tij = testsystems.generate_transition_matrix(reversible=True) # stationary distribution import msmtools.analysis as msmana Pi = msmana.stationary_distribution(Tij) from bhmm import GaussianOutputModel means = [-1, +1] sigmas = [1, 1] output_model = GaussianOutputModel(nstates, means=means, sigmas=sigmas) model = bhmm.HMM(Pi, Tij, output_model) # Test model is correct. assert_array_almost_equal(model.transition_matrix, Tij) assert_array_almost_equal(model.stationary_distribution, Pi) assert(np.allclose(model.output_model.means, np.array(means))) assert(np.allclose(model.output_model.sigmas, np.array(sigmas)))
def transition_matrix_reversible_pisym(C, return_statdist=False, **kwargs): r""" Estimates reversible transition matrix as follows: ..:math: p_{ij} = c_{ij} / c_i where c_i = sum_j c_{ij} \pi_j = \sum_j \pi_i p_{ij} x_{ij} = \pi_i p_{ij} + \pi_j p_{ji} p^{rev}_{ij} = x_{ij} / x_i where x_i = sum_j x_{ij} In words: takes the nonreversible transition matrix estimate, uses its stationary distribution to compute an equilibrium correlation matrix, symmetrizes that correlation matrix and then normalizes to the reversible transition matrix estimate. Parameters ---------- C: ndarray, shape (n,n) count matrix Returns ------- T: Estimated transition matrix """ # nonreversible estimate T_nonrev = transition_matrix_non_reversible(C) from msmtools.analysis import stationary_distribution pi = stationary_distribution(T_nonrev) # correlation matrix X = scipy.sparse.diags(pi).dot(T_nonrev) X = X.T + X # result pi_rev = np.array(X.sum(axis=1)).squeeze() T_rev = scipy.sparse.diags(1.0 / pi_rev).dot(X) if return_statdist: #np.testing.assert_allclose(pi, stationary_distribution(T_rev)) #np.testing.assert_allclose(T_rev.T.dot(pi), pi) return T_rev, pi return T_rev
def test_pcca_coarsegrain(self): # fine-grained transition matrix P = np.array([[0.9, 0.1, 0.0, 0.0, 0.0], [0.1, 0.89, 0.01, 0.0, 0.0], [0.0, 0.1, 0.8, 0.1, 0.0], [0.0, 0.0, 0.01, 0.79, 0.2], [0.0, 0.0, 0.0, 0.2, 0.8]]) from msmtools.analysis import stationary_distribution pi = stationary_distribution(P) Pi = np.diag(pi) m = 3 # Susanna+Marcus' expression ------------ M = pcca(P, m) pi_c = np.dot(M.T, pi) Pi_c_inv = np.diag(1.0 / pi_c) # restriction and interpolation operators R = M.T I = np.dot(np.dot(Pi, M), Pi_c_inv) # result ms1 = np.linalg.inv(np.dot(R, I)).T ms2 = np.dot(np.dot(I.T, P), R.T) Pc_ref = np.dot(ms1, ms2) # --------------------------------------- from .pcca import coarsegrain Pc = coarsegrain(P, 3) # test against Marcus+Susanna's expression assert np.max(np.abs(Pc - Pc_ref)) < 1e-10 # test mass conservation assert np.allclose(Pc.sum(axis=1), np.ones(m)) from .pcca import PCCA p = PCCA(P, m) # test against Marcus+Susanna's expression assert np.max( np.abs(p.coarse_grained_transition_matrix - Pc_ref)) < 1e-10 # test against the present coarse-grained stationary dist assert np.max( np.abs(p.coarse_grained_stationary_probability - pi_c)) < 1e-10 # test mass conservation assert np.allclose(p.coarse_grained_transition_matrix.sum(axis=1), np.ones(m))
def force_spectroscopy_model(): """ Construct a specific three-state test model intended to be representative of single-molecule force spectroscopy experiments. Returns ------- model : HMM The synthetic HMM model. Examples -------- >>> model = force_spectroscopy_model() """ nstates = 3 # Define state emission probabilities. output_model = GaussianOutputModel(nstates, means=[3.0, 4.7, 5.6], sigmas=[1.0, 0.3, 0.2]) # Define a reversible transition matrix. Tij = np.array([[0.98, 0.01540412, 0.00459588], [0.06331175, 0.9, 0.03668825], [0.00339873, 0.00660127, 0.99]]) # Use stationary distribution as initial distribution import msmtools.analysis as msmana pi = msmana.stationary_distribution(Tij) # Construct HMM with these parameters. from bhmm import HMM model = HMM(pi, Tij, output_model) return model
def plot_markov_model( P, pos=None, state_sizes=None, state_scale=1.0, state_colors='#ff5500', state_labels='auto', minflux=1e-6, arrow_scale=1.0, arrow_curvature=1.0, arrow_labels='weights', arrow_label_format='%2.e', max_width=12, max_height=12, figpadding=0.2, show_frame=False, ax=None, **textkwargs): r"""Network representation of MSM transition matrix This visualization is not optimized for large matrices. It is meant to be used for the visualization of small models with up to 10-20 states, e.g. obtained by a HMM coarse-graining. If used with large network, the automatic node positioning will be very slow and may still look ugly. Parameters ---------- P : ndarray(n,n) or MSM object with attribute 'transition matrix' Transition matrix or MSM object pos : ndarray(n,2), optional, default=None User-defined positions to draw the states on. If not given, will try to place them automatically. state_sizes : ndarray(n), optional, default=None User-defined areas of the discs drawn for each state. If not given, the stationary probability of P will be used. state_colors : string, ndarray(n), or list, optional, default='#ff5500' (orange) string : a Hex code for a single color used for all states array : n values in [0,1] which will result in a grayscale plot list : of len = nstates, with a color for each state. The list can mix strings, RGB values and hex codes, e.g. :py:obj:`state_colors` = ['g', 'red', [.23, .34, .35], '#ff5500'] is possible. state_labels : list of strings, optional, default is 'auto' A list with a label for each state, to be displayed at the center of each node/state. If left to 'auto', the labels are automatically set to the state indices. minflux : float, optional, default=1e-6 The minimal flux (p_i * p_ij) for a transition to be drawn arrow_scale : float, optional, default=1.0 Relative arrow scale. Set to a value different from 1 to increase or decrease the arrow width. arrow_curvature : float, optional, default=1.0 Relative arrow curvature. Set to a value different from 1 to make arrows more or less curved. arrow_labels : 'weights', None or a ndarray(n,n) with label strings. Optional, default='weights' Strings to be placed upon arrows. If None, no labels will be used. If 'weights', the elements of P will be used. If a matrix of strings is given by the user these will be used. arrow_label_format : str, optional, default='%10.2f' The numeric format to print the arrow labels max_width = 12 The maximum figure width max_height = 12 The maximum figure height figpadding = 0.2 The relative figure size used for the padding show_frame: boolean (default=False) Draw a frame around the network. ax : matplotlib Axes object, optional, default=None The axes to plot to. When set to None a new Axes (and Figure) object will be used. textkwargs : optional argument for the text of the state and arrow labels. See http://matplotlib.org/api/text_api.html#matplotlib.text.Text for more info. The parameter 'size' refers to the size of the state and arrow labels and overwrites the matplotlib default. The parameter 'arrow_label_size' is only used for the arrow labels; please note that 'arrow_label_size' is not part of matplotlib.text.Text's set of parameters and will raise an exception when passed to matplotlib.text.Text directly. Returns ------- fig, pos : matplotlib.Figure, ndarray(n,2) a Figure object containing the plot and the positions of states. Can be used later to plot a different network representation (e.g. the flux) Examples -------- >>> import numpy as np >>> P = np.array([[0.8, 0.15, 0.05, 0.0, 0.0], ... [0.1, 0.75, 0.05, 0.05, 0.05], ... [0.05, 0.1, 0.8, 0.0, 0.05], ... [0.0, 0.2, 0.0, 0.8, 0.0], ... [0.0, 0.02, 0.02, 0.0, 0.96]]) >>> plot_markov_model(P) # doctest:+ELLIPSIS (<matplotlib.figure.Figure..., array...) """ from msmtools import analysis as msmana if isinstance(P, _np.ndarray): P = P.copy() else: # MSM object? then get transition matrix first P = P.transition_matrix.copy() if state_sizes is None: state_sizes = msmana.stationary_distribution(P) if minflux > 0: F = _np.dot(_np.diag(msmana.stationary_distribution(P)), P) I, J = _np.where(F < minflux) P[I, J] = 0.0 plot = NetworkPlot(P, pos=pos, ax=ax) fig = plot.plot_network( state_sizes=state_sizes, state_scale=state_scale, state_colors=state_colors, state_labels=state_labels, arrow_scale=arrow_scale, arrow_curvature=arrow_curvature, arrow_labels=arrow_labels, arrow_label_format=arrow_label_format, max_width=max_width, max_height=max_height, figpadding=figpadding, xticks=False, yticks=False, show_frame=show_frame, **textkwargs) return fig, plot.pos
def dalton_model(nstates=3, omin=-5, omax=5, sigma_min=0.5, sigma_max=2.0, lifetime_max=100, lifetime_min=10, reversible=True, output='gaussian'): """ Construct a test multistate model with regular spaced emission means (linearly interpolated between omin and omax) and variable emission widths (linearly interpolated between sigma_min and sigma_max). Parameters ---------- nstates : int, optional, default = 3 number of hidden states omin : float, optional, default = -5 mean position of the first state. omax : float, optional, default = 5 mean position of the last state. sigma_min : float, optional, default = 0.5 The width of the observed gaussian distribution for the first state sigma_max : float, optional, default = 2.0 The width of the observed gaussian distribution for the last state lifetime_max : float, optional, default = 100 maximum lifetime of any state lifetime_min : float, optional, default = 10 minimum lifetime of any state reversible : bool, optional, default=True If True, the row-stochastic transition matrix will be reversible. output_model_type : str, optional, default='gaussian' Output model to use, one of ['gaussian', 'discrete'] Returns ------- model : HMM The synthetic HMM model. Examples -------- Generate default model. >>> model = dalton_model() Generate model with specified number of states. >>> model = dalton_model(nstates=5) Generate non-reversible model. >>> model = dalton_model(reversible=False) Generate a discrete output model. >>> model = dalton_model(output='discrete') """ # parameters means = np.linspace(omin, omax, num=nstates) sigmas = np.linspace(sigma_min, sigma_max, num=nstates) # Define state emission probabilities. if output == 'gaussian': output_model = GaussianOutputModel(nstates, means=means, sigmas=sigmas) elif output == 'discrete': # Construct matrix of output probabilities # B[i,j] is probability state i produces symbol j, where nsymbols = nstates B = np.zeros([nstates, nstates], dtype=np.float64) for i in range(nstates): for j in range(nstates): B[i, j] = np.exp(-0.5 * (means[i] - means[j]) / (sigmas[i] * sigmas[j])) B[i, :] /= B[i, :].sum() output_model = DiscreteOutputModel(B) else: raise Exception("output_model_type = '%s' unknown, must be one of ['gaussian', 'discrete']" % output) Tij = generate_transition_matrix(nstates, lifetime_max=lifetime_max, lifetime_min=lifetime_min, reversible=reversible) # stationary distribution import msmtools.analysis as msmana Pi = msmana.stationary_distribution(Tij) # Construct HMM with these parameters. from bhmm import HMM model = HMM(Pi, Tij, output_model) return model
def init_model_gaussian1d(observations, nstates, reversible=True): """Generate an initial model with 1D-Gaussian output densities Parameters ---------- observations : list of ndarray((T_i), dtype=float) list of arrays of length T_i with observation data nstates : int The number of states. Examples -------- Generate initial model for a gaussian output model. >>> from bhmm import testsystems >>> [model, observations, states] = testsystems.generate_synthetic_observations(output='gaussian') >>> initial_model = init_model_gaussian1d(observations, model.nstates) """ ntrajectories = len(observations) # Concatenate all observations. collected_observations = np.array([], dtype=config.dtype) for o_t in observations: collected_observations = np.append(collected_observations, o_t) # Fit a Gaussian mixture model to obtain emission distributions and state stationary probabilities. from bhmm._external.sklearn import mixture gmm = mixture.GMM(n_components=nstates) gmm.fit(collected_observations[:,None]) from bhmm import GaussianOutputModel output_model = GaussianOutputModel(nstates, means=gmm.means_[:,0], sigmas=np.sqrt(gmm.covars_[:,0])) logger().info("Gaussian output model:\n"+str(output_model)) # Extract stationary distributions. Pi = np.zeros([nstates], np.float64) Pi[:] = gmm.weights_[:] logger().info("GMM weights: %s" % str(gmm.weights_)) # Compute fractional state memberships. Nij = np.zeros([nstates, nstates], np.float64) for o_t in observations: # length of trajectory T = o_t.shape[0] # output probability pobs = output_model.p_obs(o_t) # normalize pobs /= pobs.sum(axis=1)[:,None] # Accumulate fractional transition counts from this trajectory. for t in range(T-1): Nij[:,:] = Nij[:,:] + np.outer(pobs[t,:], pobs[t+1,:]) logger().info("Nij\n"+str(Nij)) # Compute transition matrix maximum likelihood estimate. import msmtools.estimation as msmest import msmtools.analysis as msmana Tij = msmest.transition_matrix(Nij, reversible=reversible) pi = msmana.stationary_distribution(Tij) # Update model. model = HMM(pi, Tij, output_model) return model
def test_statdist(self): P = self.bdc.transition_matrix() mu = self.bdc.stationary_distribution() mun = stationary_distribution(P) assert_allclose(mu, mun)
def tpt(T, A, B, mu=None, qminus=None, qplus=None, rate_matrix=False): r""" Computes the A->B reactive flux using transition path theory (TPT) Parameters ---------- T : (M, M) ndarray or scipy.sparse matrix Transition matrix (default) or Rate matrix (if rate_matrix=True) A : array_like List of integer state labels for set A B : array_like List of integer state labels for set B mu : (M,) ndarray (optional) Stationary vector qminus : (M,) ndarray (optional) Backward committor for A->B reaction qplus : (M,) ndarray (optional) Forward committor for A-> B reaction rate_matrix = False : boolean By default (False), T is a transition matrix. If set to True, T is a rate matrix. Returns ------- tpt: msmtools.flux.ReactiveFlux object A python object containing the reactive A->B flux network and several additional quantities, such as stationary probability, committors and set definitions. Notes ----- The central object used in transition path theory is the forward and backward comittor function. TPT (originally introduced in [1]) for continous systems has a discrete version outlined in [2]. Here, we use the transition matrix formulation described in [3]. See also -------- msmtools.analysis.committor, ReactiveFlux References ---------- .. [1] W. E and E. Vanden-Eijnden. Towards a theory of transition paths. J. Stat. Phys. 123: 503-523 (2006) .. [2] P. Metzner, C. Schuette and E. Vanden-Eijnden. Transition Path Theory for Markov Jump Processes. Multiscale Model Simul 7: 1192-1219 (2009) .. [3] F. Noe, Ch. Schuette, E. Vanden-Eijnden, L. Reich and T. Weikl: Constructing the Full Ensemble of Folding Pathways from Short Off-Equilibrium Simulations. Proc. Natl. Acad. Sci. USA, 106, 19011-19016 (2009) """ import msmtools.analysis as msmana if len(A) == 0 or len(B) == 0: raise ValueError('set A or B is empty') n = T.shape[0] if len(A) > n or len(B) > n or max(A) > n or max(B) > n: raise ValueError( 'set A or B defines more states, than given transition matrix.') if (rate_matrix is False) and (not msmana.is_transition_matrix(T)): raise ValueError('given matrix T is not a transition matrix') if (rate_matrix is True): raise NotImplementedError( 'TPT with rate matrix is not yet implemented - But it is very simple, so feel free to do it.' ) # we can compute the following properties from either dense or sparse T # stationary dist if mu is None: mu = msmana.stationary_distribution(T) # forward committor if qplus is None: qplus = msmana.committor(T, A, B, forward=True) # backward committor if qminus is None: if msmana.is_reversible(T, mu=mu): qminus = 1.0 - qplus else: qminus = msmana.committor(T, A, B, forward=False, mu=mu) # gross flux grossflux = flux_matrix(T, mu, qminus, qplus, netflux=False) # net flux netflux = to_netflux(grossflux) # construct flux object from .reactive_flux import ReactiveFlux F = ReactiveFlux(A, B, netflux, mu=mu, qminus=qminus, qplus=qplus, gross_flux=grossflux) # done return F
def _pcca_connected(P, n, return_rot=False): """ PCCA+ spectral clustering method with optimized memberships [1]_ Clusters the first n_cluster eigenvectors of a transition matrix in order to cluster the states. This function assumes that the transition matrix is fully connected. Parameters ---------- P : ndarray (n,n) Transition matrix. n : int Number of clusters to group to. Returns ------- chi by default, or (chi,rot) if return_rot = True chi : ndarray (n x m) A matrix containing the probability or membership of each state to be assigned to each cluster. The rows sum to 1. rot_mat : ndarray (m x m) A rotation matrix that rotates the dominant eigenvectors to yield the PCCA memberships, i.e.: chi = np.dot(evec, rot_matrix References ---------- [1] S. Roeblitz and M. Weber, Fuzzy spectral clustering by PCCA+: application to Markov state models and data classification. Adv Data Anal Classif 7, 147-179 (2013). """ # test connectivity from msmtools.estimation import connected_sets labels = connected_sets(P) n_components = len( labels ) # (n_components, labels) = connected_components(P, connection='strong') if (n_components > 1): raise ValueError( "Transition matrix is disconnected. Cannot use pcca_connected.") from msmtools.analysis import stationary_distribution pi = stationary_distribution(P) # print "statdist = ",pi from msmtools.analysis import is_reversible if not is_reversible(P, mu=pi): raise ValueError( "Transition matrix does not fulfill detailed balance. " "Make sure to call pcca with a reversible transition matrix estimate" ) # TODO: Susanna mentioned that she has a potential fix for nonreversible matrices by replacing each complex conjugate # pair by the real and imaginary components of one of the two vectors. We could use this but would then need to # orthonormalize all eigenvectors e.g. using Gram-Schmidt orthonormalization. Currently there is no theoretical # foundation for this, so I'll skip it for now. # right eigenvectors, ordered from msmtools.analysis import eigenvectors evecs = eigenvectors(P, n) # orthonormalize for i in range(n): evecs[:, i] /= math.sqrt(np.dot(evecs[:, i] * pi, evecs[:, i])) # make first eigenvector positive evecs[:, 0] = np.abs(evecs[:, 0]) # Is there a significant complex component? if not np.alltrue(np.isreal(evecs)): warnings.warn( "The given transition matrix has complex eigenvectors, so it doesn't exactly fulfill detailed balance " + "forcing eigenvectors to be real and continuing. Be aware that this is not theoretically solid." ) evecs = np.real(evecs) # create initial solution using PCCA+. This could have negative memberships (chi, rot_matrix) = _pcca_connected_isa(evecs, n) #print "initial chi = \n",chi # optimize the rotation matrix with PCCA++. rot_matrix = _opt_soft(evecs, rot_matrix, n) # These memberships should be nonnegative memberships = np.dot(evecs[:, :], rot_matrix) # We might still have numerical errors. Force memberships to be in [0,1] # print "memberships unnormalized: ",memberships memberships = np.maximum(0.0, memberships) memberships = np.minimum(1.0, memberships) # print "memberships unnormalized: ",memberships for i in range(0, np.shape(memberships)[0]): memberships[i] /= np.sum(memberships[i]) # print "final chi = \n",chi return memberships