def __initX(C): """ Computes an initial guess for a reversible correlation matrix """ from msmtools.estimation import tmatrix from msmtools.analysis import statdist T = tmatrix(C) mu = statdist(T) Corr = np.dot(np.diag(mu), T) return 0.5 * (Corr + Corr.T)
def stationary_distribution(C, P): # import emma import msmtools.estimation as msmest import msmtools.analysis as msmana # disconnected sets n = np.shape(C)[0] ctot = np.sum(C) pi = np.zeros((n)) # treat each connected set separately S = msmest.connected_sets(C) for s in S: # compute weight w = np.sum(C[s,:]) / ctot pi[s] = w * msmana.statdist(P[s,:][:,s]) # reinforce normalization pi /= np.sum(pi) return pi
def setUp(self): P = np.array([[0.8, 0.15, 0.05, 0.0, 0.0], [0.1, 0.75, 0.05, 0.05, 0.05], [0.05, 0.1, 0.8, 0.0, 0.05], [0.0, 0.2, 0.0, 0.8, 0.0], [0.0, 0.02, 0.02, 0.0, 0.96]]) P = csr_matrix(P) A = [0] B = [4] mu = statdist(P) qminus = committor(P, A, B, forward=False, mu=mu) qplus = committor(P, A, B, forward=True, mu=mu) self.A = A self.B = B self.F = flux_matrix(P, mu, qminus, qplus, netflux=True) self.paths = [ np.array([0, 1, 4]), np.array([0, 2, 4]), np.array([0, 1, 2, 4]) ] self.capacities = [ 0.0072033898305084252, 0.0030871670702178975, 0.00051452784503631509 ]
def setUp(self): # 5-state toy system self.P = np.array([[0.8, 0.15, 0.05, 0.0, 0.0], [0.1, 0.75, 0.05, 0.05, 0.05], [0.05, 0.1, 0.8, 0.0, 0.05], [0.0, 0.2, 0.0, 0.8, 0.0], [0.0, 0.02, 0.02, 0.0, 0.96]]) self.A = [0] self.B = [4] self.I = [1, 2, 3] # REFERENCE SOLUTION FOR PATH DECOMP self.ref_committor = np.array([0., 0.35714286, 0.42857143, 0.35714286, 1.]) self.ref_backwardcommittor = np.array([1., 0.65384615, 0.53125, 0.65384615, 0.]) self.ref_grossflux = np.array([[0., 0.00771792, 0.00308717, 0., 0.], [0., 0., 0.00308717, 0.00257264, 0.00720339], [0., 0.00257264, 0., 0., 0.00360169], [0., 0.00257264, 0., 0., 0.], [0., 0., 0., 0., 0.]]) self.ref_netflux = np.array([[0.00000000e+00, 7.71791768e-03, 3.08716707e-03, 0.00000000e+00, 0.00000000e+00], [0.00000000e+00, 0.00000000e+00, 5.14527845e-04, 0.00000000e+00, 7.20338983e-03], [0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 3.60169492e-03], [0.00000000e+00, 4.33680869e-19, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00], [0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00]]) self.ref_totalflux = 0.0108050847458 self.ref_kAB = 0.0272727272727 self.ref_mfptAB = 36.6666666667 self.ref_paths = [[0, 1, 4], [0, 2, 4], [0, 1, 2, 4]] self.ref_pathfluxes = np.array([0.00720338983051, 0.00308716707022, 0.000514527845036]) self.ref_paths_99percent = [[0, 1, 4], [0, 2, 4]] self.ref_pathfluxes_99percent = np.array([0.00720338983051, 0.00308716707022]) self.ref_majorflux_99percent = np.array([[0., 0.00720339, 0.00308717, 0., 0.], [0., 0., 0., 0., 0.00720339], [0., 0., 0., 0., 0.00308717], [0., 0., 0., 0., 0.], [0., 0., 0., 0., 0.]]) msmobj = markov_model(self.P) msmobj.mu = msmana.statdist(self.P) msmobj.estimated = True msmobj1 = msmobj # Testing: # self.tpt1 = tpt(self.P, self.A, self.B) self.tpt1 = tpt(msmobj1, self.A, self.B) # 16-state toy system P2_nonrev = np.array([[0.5, 0.2, 0.0, 0.0, 0.3, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.2, 0.5, 0.1, 0.0, 0.0, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.1, 0.5, 0.2, 0.0, 0.0, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.1, 0.5, 0.0, 0.0, 0.0, 0.4, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.3, 0.0, 0.0, 0.0, 0.5, 0.1, 0.0, 0.0, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.1, 0.0, 0.0, 0.2, 0.5, 0.1, 0.0, 0.0, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.1, 0.0, 0.0, 0.1, 0.5, 0.2, 0.0, 0.0, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.1, 0.0, 0.0, 0.3, 0.5, 0.0, 0.0, 0.0, 0.1, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.1, 0.0, 0.0, 0.0, 0.5, 0.1, 0.0, 0.0, 0.3, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.1, 0.0, 0.0, 0.2, 0.5, 0.1, 0.0, 0.0, 0.1, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.1, 0.0, 0.0, 0.1, 0.5, 0.1, 0.0, 0.0, 0.2, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.1, 0.0, 0.0, 0.2, 0.5, 0.0, 0.0, 0.0, 0.2], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.3, 0.0, 0.0, 0.0, 0.5, 0.2, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.1, 0.0, 0.0, 0.3, 0.5, 0.1, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.2, 0.0, 0.0, 0.1, 0.5, 0.2], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.3, 0.0, 0.0, 0.2, 0.5]]) pstat2_nonrev = msmana.statdist(P2_nonrev) # make reversible C = np.dot(np.diag(pstat2_nonrev), P2_nonrev) Csym = C + C.T self.P2 = Csym / np.sum(Csym, axis=1)[:, np.newaxis] pstat2 = msmana.statdist(self.P2) self.A2 = [0, 4] self.B2 = [11, 15] self.coarsesets2 = [[2, 3, 6, 7], [10, 11, 14, 15], [0, 1, 4, 5], [8, 9, 12, 13], ] # REFERENCE SOLUTION CG self.ref2_tpt_sets = [set([0, 4]), set([2, 3, 6, 7]), set([10, 14]), set([1, 5]), set([8, 9, 12, 13]), set([11, 15])] self.ref2_cgA = [0] self.ref2_cgI = [1, 2, 3, 4] self.ref2_cgB = [5] self.ref2_cgpstat = np.array([0.15995388, 0.18360442, 0.12990937, 0.11002342, 0.31928127, 0.09722765]) self.ref2_cgcommittor = np.array([0., 0.56060272, 0.73052426, 0.19770537, 0.36514272, 1.]) self.ref2_cgbackwardcommittor = np.array([1., 0.43939728, 0.26947574, 0.80229463, 0.63485728, 0.]) self.ref2_cggrossflux = np.array([[0., 0., 0., 0.00427986, 0.00282259, 0.], [0., 0, 0.00234578, 0.00104307, 0., 0.00201899], [0., 0.00113892, 0, 0., 0.00142583, 0.00508346], [0., 0.00426892, 0., 0, 0.00190226, 0.], [0., 0., 0.00530243, 0.00084825, 0, 0.], [0., 0., 0., 0., 0., 0.]]) self.ref2_cgnetflux = np.array([[0., 0., 0., 0.00427986, 0.00282259, 0.], [0., 0., 0.00120686, 0., 0., 0.00201899], [0., 0., 0., 0., 0., 0.00508346], [0., 0.00322585, 0., 0., 0.00105401, 0.], [0., 0., 0.0038766, 0., 0., 0.], [0., 0., 0., 0., 0., 0.]]) """Dummy dtraj to trick trick constructor of MSM""" dtraj = [0, 0] tau = 1 msmobj = markov_model(self.P2) msmobj.mu = msmana.statdist(self.P2) msmobj.estimated = True msmobj2 = msmobj # Testing self.tpt2 = tpt(msmobj2, self.A2, self.B2)
def test_mle_trev_given_pi(self): C = np.loadtxt(testpath + 'C_1_lag.dat') pi = np.loadtxt(testpath + 'pi.dat') T_impl_algo_dense_type_dense = impl_dense(C, pi) T_impl_algo_sparse_type_sparse = impl_sparse( scipy.sparse.csr_matrix(C), pi).toarray() T_Frank = impl_dense_Frank(C, pi) T_api_algo_dense_type_dense = apicall(C, reversible=True, mu=pi, method='dense') T_api_algo_sparse_type_dense = apicall(C, reversible=True, mu=pi, method='sparse') T_api_algo_dense_type_sparse = apicall(scipy.sparse.csr_matrix(C), reversible=True, mu=pi, method='dense').toarray() T_api_algo_sparse_type_sparse = apicall(scipy.sparse.csr_matrix(C), reversible=True, mu=pi, method='sparse').toarray() T_api_algo_auto_type_dense = apicall(C, reversible=True, mu=pi, method='auto') T_api_algo_auto_type_sparse = apicall(scipy.sparse.csr_matrix(C), reversible=True, mu=pi, method='auto').toarray() assert_allclose(T_impl_algo_dense_type_dense, T_Frank) assert_allclose(T_impl_algo_sparse_type_sparse, T_Frank) assert_allclose(T_api_algo_dense_type_dense, T_Frank) assert_allclose(T_api_algo_sparse_type_dense, T_Frank) assert_allclose(T_api_algo_dense_type_sparse, T_Frank) assert_allclose(T_api_algo_sparse_type_sparse, T_Frank) assert_allclose(T_api_algo_auto_type_dense, T_Frank) assert_allclose(T_api_algo_auto_type_sparse, T_Frank) assert is_transition_matrix(T_Frank) assert is_transition_matrix(T_impl_algo_dense_type_dense) assert is_transition_matrix(T_impl_algo_sparse_type_sparse) assert is_transition_matrix(T_api_algo_dense_type_dense) assert is_transition_matrix(T_api_algo_sparse_type_dense) assert is_transition_matrix(T_api_algo_dense_type_sparse) assert is_transition_matrix(T_api_algo_sparse_type_sparse) assert is_transition_matrix(T_api_algo_auto_type_dense) assert is_transition_matrix(T_api_algo_auto_type_sparse) assert_allclose(statdist(T_Frank), pi) assert_allclose(statdist(T_impl_algo_dense_type_dense), pi) assert_allclose(statdist(T_impl_algo_sparse_type_sparse), pi) assert_allclose(statdist(T_api_algo_dense_type_dense), pi) assert_allclose(statdist(T_api_algo_sparse_type_dense), pi) assert_allclose(statdist(T_api_algo_dense_type_sparse), pi) assert_allclose(statdist(T_api_algo_sparse_type_sparse), pi) assert_allclose(statdist(T_api_algo_auto_type_dense), pi) assert_allclose(statdist(T_api_algo_auto_type_sparse), pi)
def pcca(P, m, stationary_distribution=None): """PCCA+ spectral clustering method with optimized memberships. Implementation according to [1]_. Clusters the first m eigenvectors of a transition matrix in order to cluster the states. This function does not assume that the transition matrix is fully connected. Disconnected sets will automatically define the first metastable states, with perfect membership assignments. Parameters ---------- P : ndarray (n,n) Transition matrix. m : int Number of clusters to group to. stationary_distribution : ndarray(n,), optional, default=None Stationary distribution over the full state space, can be given if already computed. References ---------- .. [1] S. Roeblitz and M. Weber, Fuzzy spectral clustering by PCCA+: application to Markov state models and data classification. Adv Data Anal Classif 7, 147-179 (2013). """ if m <= 0 or m > P.shape[0]: raise ValueError( "Number of metastable sets must be larger than 0 and can be at most as large as the number " "of states.") assert 0 < m <= P.shape[0] from scipy.sparse import issparse if issparse(P): warnings.warn( 'PCCA is only implemented for dense matrices, ' 'converting sparse transition matrix to dense ndarray.', stacklevel=2) P = P.toarray() # stationary distribution if stationary_distribution is None: from msmtools.analysis import stationary_distribution as statdist pi = statdist(P) else: pi = stationary_distribution # memberships # TODO: can be improved. pcca computes stationary distribution internally, we don't need to compute it twice. from msmtools.analysis.dense.pcca import pcca as _algorithm_impl M = _algorithm_impl(P, m) # coarse-grained stationary distribution pi_coarse = np.dot(M.T, pi) # HMM output matrix B = mdot(np.diag(1.0 / pi_coarse), M.T, np.diag(pi)) # renormalize B to make it row-stochastic B /= B.sum(axis=1)[:, None] # coarse-grained transition matrix W = np.linalg.inv(np.dot(M.T, M)) A = np.dot(np.dot(M.T, P), M) P_coarse = np.dot(W, A) # symmetrize and renormalize to eliminate numerical errors X = np.dot(np.diag(pi_coarse), P_coarse) # and normalize P_coarse = X / X.sum(axis=1)[:, None] return PCCAModel(P_coarse, pi_coarse, M, B)