def test_connected_count_matrix(self): """Directed""" C_cc = largest_connected_submatrix(self.C) assert_allclose(C_cc, self.C_cc_directed) """Undirected""" C_cc = largest_connected_submatrix(self.C, directed=False) assert_allclose(C_cc, self.C_cc_undirected)
def test_connected_count_matrix(self): """Directed""" C_cc = largest_connected_submatrix(self.C) assert_allclose(C_cc.toarray(), self.C_cc_directed) """Directed with user specified lcc""" C_cc = largest_connected_submatrix(self.C, lcc=np.array([0, 1])) assert_allclose(C_cc.toarray(), self.C_cc_directed[0:2, 0:2]) """Undirected""" C_cc = largest_connected_submatrix(self.C, directed=False) assert_allclose(C_cc.toarray(), self.C_cc_undirected) """Undirected with user specified lcc""" C_cc = largest_connected_submatrix(self.C, lcc=np.array([0, 1]), directed=False) assert_allclose(C_cc.toarray(), self.C_cc_undirected[0:2, 0:2])
def setUp(self): """Store state of the rng""" self.state = np.random.mtrand.get_state() """Reseed the rng to enforce 'deterministic' behavior""" np.random.mtrand.seed(42) """Meta-stable birth-death chain""" b = 2 q = np.zeros(7) p = np.zeros(7) q[1:] = 0.5 p[0:-1] = 0.5 q[2] = 1.0 - 10**(-b) q[4] = 10**(-b) p[2] = 10**(-b) p[4] = 1.0 - 10**(-b) bdc = BirthDeathChain(q, p) self.dtraj = bdc.msm.simulate(10000, start=0) self.tau = 1 """Estimate MSM""" self.C_MSM = count_matrix(self.dtraj, self.tau, sliding=True) self.lcc_MSM = largest_connected_set(self.C_MSM) self.Ccc_MSM = largest_connected_submatrix(self.C_MSM, lcc=self.lcc_MSM) self.mle_rev_max_err = 1E-8 self.P_MSM = transition_matrix(self.Ccc_MSM, reversible=True, maxerr=self.mle_rev_max_err) self.mu_MSM = stationary_distribution(self.P_MSM) self.k = 3 self.ts = timescales(self.P_MSM, k=self.k, tau=self.tau)
def _prepare_input_revpi(self, C, pi): """Max. state index visited by trajectories""" nC = C.shape[0] # Max. state index of the stationary vector array npi = pi.shape[0] # pi has to be defined on all states visited by the trajectories if nC > npi: raise ValueError( 'There are visited states for which no stationary probability is given' ) # Reduce pi to the visited set pi_visited = pi[0:nC] # Find visited states with positive stationary probabilities""" pos = _np.where(pi_visited > 0.0)[0] # Reduce C to positive probability states""" C_pos = largest_connected_submatrix(C, lcc=pos) if C_pos.sum() == 0.0: errstr = """The set of states with positive stationary probabilities is not visited by the trajectories. A MSM reversible with respect to the given stationary vector can not be estimated""" raise ValueError(errstr) # Compute largest connected set of C_pos, undirected connectivity""" lcc = largest_connected_set(C_pos, directed=False) return pos[lcc]
def equilibrium_transition_matrix(Xi, omega, sigma, reversible=True, return_lcc=True): """ Compute equilibrium transition matrix from OOM components: Parameters ---------- Xi : ndarray(M, N, M) matrix of set-observable operators omega: ndarray(M,) information state vector of OOM sigma : ndarray(M,) evaluator of OOM reversible : bool, optional, default=True symmetrize corrected count matrix in order to obtain a reversible transition matrix. return_lcc: bool, optional, default=True return indices of largest connected set. Returns ------- Tt_Eq : ndarray(N, N) equilibrium transition matrix lcc : ndarray(M,) the largest connected set of the transition matrix. """ import deeptime.markov.tools.estimation as me # Compute equilibrium transition matrix: Ct_Eq = np.einsum('j,jkl,lmn,n->km', omega, Xi, Xi, sigma) # Remove negative entries: Ct_Eq[Ct_Eq < 0.0] = 0.0 # Compute transition matrix after symmetrization: pi_r = np.sum(Ct_Eq, axis=1) if reversible: pi_c = np.sum(Ct_Eq, axis=0) pi_sym = pi_r + pi_c # Avoid zero row-sums. States with zero row-sums will be eliminated by active set update. ind0 = np.where(pi_sym == 0.0)[0] pi_sym[ind0] = 1.0 Tt_Eq = (Ct_Eq + Ct_Eq.T) / pi_sym[:, None] else: # Avoid zero row-sums. States with zero row-sums will be eliminated by active set update. ind0 = np.where(pi_r == 0.0)[0] pi_r[ind0] = 1.0 Tt_Eq = Ct_Eq / pi_r[:, None] # Perform active set update: lcc = me.largest_connected_set(Tt_Eq) Tt_Eq = me.largest_connected_submatrix(Tt_Eq, lcc=lcc) if return_lcc: return Tt_Eq, lcc else: return Tt_Eq
def test_birth_death_chain(fixed_seed, sparse): """Meta-stable birth-death chain""" b = 2 q = np.zeros(7) p = np.zeros(7) q[1:] = 0.5 p[0:-1] = 0.5 q[2] = 1.0 - 10**(-b) q[4] = 10**(-b) p[2] = 10**(-b) p[4] = 1.0 - 10**(-b) bdc = deeptime.data.birth_death_chain(q, p) dtraj = bdc.msm.simulate(10000, start=0) tau = 1 reference_count_matrix = msmest.count_matrix(dtraj, tau, sliding=True) reference_largest_connected_component = msmest.largest_connected_set( reference_count_matrix) reference_lcs = msmest.largest_connected_submatrix( reference_count_matrix, lcc=reference_largest_connected_component) reference_msm = msmest.transition_matrix(reference_lcs, reversible=True, maxerr=1e-8) reference_statdist = msmana.stationary_distribution(reference_msm) k = 3 reference_timescales = msmana.timescales(reference_msm, k=k, tau=tau) msm = estimate_markov_model(dtraj, tau, sparse=sparse) assert_equal(tau, msm.count_model.lagtime) assert_array_equal(reference_largest_connected_component, msm.count_model.connected_sets()[0]) assert_(scipy.sparse.issparse(msm.count_model.count_matrix) == sparse) assert_(scipy.sparse.issparse(msm.transition_matrix) == sparse) if sparse: count_matrix = msm.count_model.count_matrix.toarray() transition_matrix = msm.transition_matrix.toarray() else: count_matrix = msm.count_model.count_matrix transition_matrix = msm.transition_matrix assert_array_almost_equal(reference_lcs.toarray(), count_matrix) assert_array_almost_equal(reference_count_matrix.toarray(), count_matrix) assert_array_almost_equal(reference_msm.toarray(), transition_matrix) assert_array_almost_equal(reference_statdist, msm.stationary_distribution) assert_array_almost_equal(reference_timescales[1:], msm.timescales(k - 1))
def oom_components(Ct, C2t, rank_ind=None, lcc=None, tol_one=1e-2): """ Compute OOM components and eigenvalues from count matrices: Parameters ---------- Ct : ndarray(N, N) count matrix from data C2t : sparse csc-matrix (N*N, N) two-step count matrix from data for all states, columns enumerate intermediate steps. rank_ind : ndarray(N, dtype=bool), optional, default=None indicates which singular values are accepted. By default, all non- zero singular values are accepted. lcc : ndarray(N,), optional, default=None largest connected set of the count-matrix. Two step count matrix will be reduced to this set. tol_one : float, optional, default=1e-2 keep eigenvalues of absolute value less or equal 1+tol_one. Returns ------- Xi : ndarray(M, N, M) matrix of set-observable operators oom_information_state_vector: ndarray(M,) information state vector of OOM oom_evaluator : ndarray(M,) evaluator of OOM l : ndarray(M,) eigenvalues from OOM """ import deeptime.markov.tools.estimation as me # Decompose count matrix by SVD: if lcc is not None: Ct_svd = me.largest_connected_submatrix(Ct, lcc=lcc) N1 = Ct.shape[0] else: Ct_svd = Ct V, s, W = scl.svd(Ct_svd, full_matrices=False) # Make rank decision: if rank_ind is None: ind = (s >= np.finfo(float).eps) V = V[:, rank_ind] s = s[rank_ind] W = W[rank_ind, :].T # Compute transformations: F1 = np.dot(V, np.diag(s**-0.5)) F2 = np.dot(W, np.diag(s**-0.5)) # Apply the transformations to C2t: N = Ct_svd.shape[0] M = F1.shape[1] Xi = np.zeros((M, N, M)) for n in range(N): if lcc is not None: C2t_n = C2t[:, lcc[n]] C2t_n = _reshape_sparse(C2t_n, (N1, N1)) C2t_n = me.largest_connected_submatrix(C2t_n, lcc=lcc) else: C2t_n = C2t[:, n] C2t_n = _reshape_sparse(C2t_n, (N, N)) Xi[:, n, :] = np.dot(F1.T, C2t_n.dot(F2)) # Compute sigma: c = np.sum(Ct_svd, axis=1) sigma = np.dot(F1.T, c) # Compute eigenvalues: Xi_S = np.sum(Xi, axis=1) eigenvalues, right_eigenvectors = scl.eig(Xi_S.T) # Restrict eigenvalues to reasonable range: ind = np.where( np.logical_and( np.abs(eigenvalues) <= (1 + tol_one), np.real(eigenvalues) >= 0.0))[0] eigenvalues = eigenvalues[ind] right_eigenvectors = right_eigenvectors[:, ind] # Sort and extract omega eigenvalues, right_eigenvectors = sort_eigs(eigenvalues, right_eigenvectors) omega = np.real(right_eigenvectors[:, 0]) omega = omega / np.dot(omega, sigma) return Xi, omega, sigma, eigenvalues
def __init__(self, complete: bool = True): self.complete = complete data = np.load(os.path.join(os.path.dirname(os.path.realpath(__file__)), 'resources', 'TestData_OOM_MSM.npz')) if complete: self.dtrajs = [data['arr_%d' % k] for k in range(1000)] else: excluded = [ 21, 25, 30, 40, 66, 72, 74, 91, 116, 158, 171, 175, 201, 239, 246, 280, 300, 301, 310, 318, 322, 323, 339, 352, 365, 368, 407, 412, 444, 475, 486, 494, 510, 529, 560, 617, 623, 637, 676, 689, 728, 731, 778, 780, 811, 828, 838, 845, 851, 859, 868, 874, 895, 933, 935, 938, 958, 961, 968, 974, 984, 990, 999 ] self.dtrajs = [data['arr_%d' % k] for k in np.setdiff1d(np.arange(1000), excluded)] # Number of states: self.N = 5 # Lag time: self.tau = 5 self.dtrajs_lag = [traj[:-self.tau] for traj in self.dtrajs] # Rank: if complete: self.rank = 3 else: self.rank = 2 # Build models: self.msmrev = OOMReweightedMSM(lagtime=self.tau, rank_mode='bootstrap_trajs').fit(self.dtrajs) self.msmrev_sparse = OOMReweightedMSM(lagtime=self.tau, sparse=True, rank_mode='bootstrap_trajs') \ .fit(self.dtrajs) self.msm = OOMReweightedMSM(lagtime=self.tau, reversible=False, rank_mode='bootstrap_trajs').fit(self.dtrajs) self.msm_sparse = OOMReweightedMSM(lagtime=self.tau, reversible=False, sparse=True, rank_mode='bootstrap_trajs').fit(self.dtrajs) self.estimators = [self.msmrev, self.msm, self.msmrev_sparse, self.msm_sparse] self.msms = [est.fetch_model() for est in self.estimators] # Reference count matrices at lag time tau and 2*tau: if complete: self.C2t = data['C2t'] else: self.C2t = data['C2t_s'] self.Ct = np.sum(self.C2t, axis=1) if complete: self.Ct_active = self.Ct self.C2t_active = self.C2t self.active_faction = 1. else: lcc = msmest.largest_connected_set(self.Ct) self.Ct_active = msmest.largest_connected_submatrix(self.Ct, lcc=lcc) self.C2t_active = self.C2t[:4, :4, :4] self.active_fraction = np.sum(self.Ct_active) / np.sum(self.Ct) # Compute OOM-components: self.Xi, self.omega, self.sigma, self.l = oom_transformations(self.Ct_active, self.C2t_active, self.rank) # Compute corrected transition matrix: Tt_rev = compute_transition_matrix(self.Xi, self.omega, self.sigma, reversible=True) Tt = compute_transition_matrix(self.Xi, self.omega, self.sigma, reversible=False) # Build reference models: self.rmsmrev = MarkovStateModel(Tt_rev) self.rmsm = MarkovStateModel(Tt) # Active count fraction: self.hist = count_states(self.dtrajs) self.active_hist = self.hist[:-1] if not complete else self.hist self.active_count_frac = float(np.sum(self.active_hist)) / np.sum(self.hist) if not complete else 1. self.active_state_frac = 0.8 if not complete else 1. # Commitor and MFPT: a = np.array([0, 1]) b = np.array([4]) if complete else np.array([3]) self.comm_forward = self.rmsm.committor_forward(a, b) self.comm_forward_rev = self.rmsmrev.committor_forward(a, b) self.comm_backward = self.rmsm.committor_backward(a, b) self.comm_backward_rev = self.rmsmrev.committor_backward(a, b) self.mfpt = self.tau * self.rmsm.mfpt(a, b) self.mfpt_rev = self.tau * self.rmsmrev.mfpt(a, b) # PCCA: pcca = self.rmsmrev.pcca(3 if complete else 2) self.pcca_ass = pcca.assignments self.pcca_dist = pcca.metastable_distributions self.pcca_mem = pcca.memberships self.pcca_sets = pcca.sets # Experimental quantities: a = np.array([1, 2, 3, 4, 5]) b = np.array([1, -1, 0, -2, 4]) p0 = np.array([0.5, 0.2, 0.2, 0.1, 0.0]) if not complete: a = a[:-1] b = b[:-1] p0 = p0[:-1] pi = self.rmsm.stationary_distribution pi_rev = self.rmsmrev.stationary_distribution _, _, L_rev = ma.rdl_decomposition(Tt_rev) self.exp = np.dot(self.rmsm.stationary_distribution, a) self.exp_rev = np.dot(self.rmsmrev.stationary_distribution, a) self.corr_rev = np.zeros(10) self.rel = np.zeros(10) self.rel_rev = np.zeros(10) for k in range(10): Ck_rev = np.dot(np.diag(pi_rev), np.linalg.matrix_power(Tt_rev, k)) self.corr_rev[k] = np.dot(a.T, np.dot(Ck_rev, b)) self.rel[k] = np.dot(p0.T, np.dot(np.linalg.matrix_power(Tt, k), a)) self.rel_rev[k] = np.dot(p0.T, np.dot(np.linalg.matrix_power(Tt_rev, k), a)) self.fing_cor = np.dot(a.T, L_rev.T) * np.dot(b.T, L_rev.T) self.fing_rel = np.dot(a.T, L_rev.T) * np.dot((p0 / pi_rev).T, L_rev.T)
def cktest_resource(): """Reseed the rng to enforce 'deterministic' behavior""" rnd_state = np.random.mtrand.get_state() np.random.mtrand.seed(42) """Meta-stable birth-death chain""" b = 2 q = np.zeros(7) p = np.zeros(7) q[1:] = 0.5 p[0:-1] = 0.5 q[2] = 1.0 - 10**(-b) q[4] = 10**(-b) p[2] = 10**(-b) p[4] = 1.0 - 10**(-b) bdc = BirthDeathChain(q, p) dtraj = bdc.msm.simulate(10000, start=0) tau = 1 """Estimate MSM""" MSM = estimate_markov_model(dtraj, tau) P_MSM = MSM.transition_matrix mu_MSM = MSM.stationary_distribution """Meta-stable sets""" A = [0, 1, 2] B = [4, 5, 6] w_MSM = np.zeros((2, mu_MSM.shape[0])) w_MSM[0, A] = mu_MSM[A] / mu_MSM[A].sum() w_MSM[1, B] = mu_MSM[B] / mu_MSM[B].sum() K = 10 P_MSM_dense = P_MSM p_MSM = np.zeros((K, 2)) w_MSM_k = 1.0 * w_MSM for k in range(1, K): w_MSM_k = np.dot(w_MSM_k, P_MSM_dense) p_MSM[k, 0] = w_MSM_k[0, A].sum() p_MSM[k, 1] = w_MSM_k[1, B].sum() """Assume that sets are equal, A(\tau)=A(k \tau) for all k""" w_MD = 1.0 * w_MSM p_MD = np.zeros((K, 2)) eps_MD = np.zeros((K, 2)) p_MSM[0, :] = 1.0 p_MD[0, :] = 1.0 eps_MD[0, :] = 0.0 for k in range(1, K): """Build MSM at lagtime k*tau""" C_MD = count_matrix(dtraj, k * tau, sliding=True) / (k * tau) lcc_MD = largest_connected_set(C_MD) Ccc_MD = largest_connected_submatrix(C_MD, lcc=lcc_MD) c_MD = Ccc_MD.sum(axis=1) P_MD = transition_matrix(Ccc_MD).toarray() w_MD_k = np.dot(w_MD, P_MD) """Set A""" prob_MD = w_MD_k[0, A].sum() c = c_MD[A].sum() p_MD[k, 0] = prob_MD eps_MD[k, 0] = np.sqrt(k * (prob_MD - prob_MD**2) / c) """Set B""" prob_MD = w_MD_k[1, B].sum() c = c_MD[B].sum() p_MD[k, 1] = prob_MD eps_MD[k, 1] = np.sqrt(k * (prob_MD - prob_MD**2) / c) """Input""" yield MSM, p_MSM, p_MD np.random.mtrand.set_state(rnd_state)