def __call__(self): """ Look for a counterexample. """ # Sample a rate matrix. # Use a trick by Robert Kern to left and right multiply by diagonals. # http://mail.scipy.org/pipermail/numpy-discussion/2007-March/ # 026809.html S = MatrixUtil.sample_pos_sym_matrix(self.nstates) v = mrate.sample_distn(self.nstates) R = (v**-0.5)[:,np.newaxis] * S * (v**0.5) R -= np.diag(np.sum(R, axis=1)) # Construct a parent-independent process # with the same max rate and stationary distribution # as the sampled process. #max_rate = max(-np.diag(R)) #expected_rate = np.dot(v, -np.diag(R)) #logical_entropy = np.dot(v, 1-v) #randomization_rate = expected_rate / logical_entropy Q = self.simplification(R) #Q = np.outer(np.ones(self.nstates), v) #Q -= np.diag(np.sum(Q, axis=1)) #Q *= max(np.diag(R) / np.diag(Q)) # sample a random time t = random.expovariate(1) # Check that the mutual information of the # parent independent process is smaller. mi_R = ctmcmi.get_expected_ll_ratio(R, t) mi_Q = ctmcmi.get_expected_ll_ratio(Q, t) if np.allclose(mi_R, mi_Q): self.n_too_close += 1 return False if mi_R < mi_Q: out = StringIO() print >> out, 'found a counterexample' print >> out print >> out, 'sampled symmetric matrix S:' print >> out, S print >> out print >> out, 'sampled stationary distribution v:' print >> out, v print >> out print >> out, 'implied rate matrix R:' print >> out, R print >> out print >> out, 'parent independent process Q:' print >> out, Q print >> out print >> out, 'sampled time t:', t print >> out print >> out, 'mutual information of sampled process:', mi_R print >> out, 'mutual information of p.i. process:', mi_Q print >> out self.counterexample = out.getvalue().rstrip() return True
def __call__(self): """ Look for a counterexample. """ # Sample a rate matrix. # Use a trick by Robert Kern to left and right multiply by diagonals. # http://mail.scipy.org/pipermail/numpy-discussion/2007-March/ # 026809.html S = MatrixUtil.sample_pos_sym_matrix(self.nstates) v = mrate.sample_distn(self.nstates) R = (v**-0.5)[:, np.newaxis] * S * (v**0.5) R -= np.diag(np.sum(R, axis=1)) # Construct a parent-independent process # with the same max rate and stationary distribution # as the sampled process. #max_rate = max(-np.diag(R)) #expected_rate = np.dot(v, -np.diag(R)) #logical_entropy = np.dot(v, 1-v) #randomization_rate = expected_rate / logical_entropy Q = self.simplification(R) #Q = np.outer(np.ones(self.nstates), v) #Q -= np.diag(np.sum(Q, axis=1)) #Q *= max(np.diag(R) / np.diag(Q)) # sample a random time t = random.expovariate(1) # Check that the mutual information of the # parent independent process is smaller. mi_R = ctmcmi.get_expected_ll_ratio(R, t) mi_Q = ctmcmi.get_expected_ll_ratio(Q, t) if np.allclose(mi_R, mi_Q): self.n_too_close += 1 return False if mi_R < mi_Q: out = StringIO() print >> out, 'found a counterexample' print >> out print >> out, 'sampled symmetric matrix S:' print >> out, S print >> out print >> out, 'sampled stationary distribution v:' print >> out, v print >> out print >> out, 'implied rate matrix R:' print >> out, R print >> out print >> out, 'parent independent process Q:' print >> out, Q print >> out print >> out, 'sampled time t:', t print >> out print >> out, 'mutual information of sampled process:', mi_R print >> out, 'mutual information of p.i. process:', mi_Q print >> out self.counterexample = out.getvalue().rstrip() return True
def __call__(self): """ Look for a counterexample. """ # Sample a rate matrix. # Use a trick by Robert Kern to left and right multiply by diagonals. # http://mail.scipy.org/pipermail/numpy-discussion/2007-March/ # 026809.html S = MatrixUtil.sample_pos_sym_matrix(self.nstates) v = mrate.sample_distn(self.nstates) R = (v**-0.5)[:,np.newaxis] * S * (v**0.5) R -= np.diag(np.sum(R, axis=1)) # sample a random time rate = 1.0 / self.etime t = random.expovariate(rate) # sample one side of the bipartition and get the mutual information k = random.randrange(1, self.nstates) A = random.sample(range(self.nstates), k) mi_non_markov = get_mutual_information(R, A, t) # get summary statistics of the non-markov process Q = msimpl.get_fast_two_state(R, A) mi_markov = ctmcmi.get_expected_ll_ratio(Q, t) # check if the mutual informations are indistinguishable if np.allclose(mi_non_markov, mi_markov): self.n_too_close += 1 return False if mi_non_markov < mi_markov: out = StringIO() print >> out, 'found a counterexample' print >> out print >> out, 'sampled symmetric matrix S:' print >> out, S print >> out print >> out, 'sampled stationary distribution v:' print >> out, v print >> out print >> out, 'implied rate matrix R:' print >> out, R print >> out print >> out, 'reduced rate matrix Q' print >> out, Q print >> out print >> out, 'sampled time t:', t print >> out print >> out, 'non-markov mutual information:', mi_non_markov print >> out, 'markov mutual information:', mi_markov print >> out self.counterexample = out.getvalue().rstrip() return True
def __call__(self): """ Look for a counterexample. """ # Sample a rate matrix. # Use a trick by Robert Kern to left and right multiply by diagonals. # http://mail.scipy.org/pipermail/numpy-discussion/2007-March/ # 026809.html S = MatrixUtil.sample_pos_sym_matrix(self.nstates) v = mrate.sample_distn(self.nstates) R = (v**-0.5)[:, np.newaxis] * S * (v**0.5) R -= np.diag(np.sum(R, axis=1)) # sample a random time rate = 1.0 / self.etime t = random.expovariate(rate) # sample one side of the bipartition and get the mutual information k = random.randrange(1, self.nstates) A = random.sample(range(self.nstates), k) mi_non_markov = get_mutual_information(R, A, t) # get summary statistics of the non-markov process Q = msimpl.get_fast_two_state(R, A) mi_markov = ctmcmi.get_expected_ll_ratio(Q, t) # check if the mutual informations are indistinguishable if np.allclose(mi_non_markov, mi_markov): self.n_too_close += 1 return False if mi_non_markov < mi_markov: out = StringIO() print >> out, 'found a counterexample' print >> out print >> out, 'sampled symmetric matrix S:' print >> out, S print >> out print >> out, 'sampled stationary distribution v:' print >> out, v print >> out print >> out, 'implied rate matrix R:' print >> out, R print >> out print >> out, 'reduced rate matrix Q' print >> out, Q print >> out print >> out, 'sampled time t:', t print >> out print >> out, 'non-markov mutual information:', mi_non_markov print >> out, 'markov mutual information:', mi_markov print >> out self.counterexample = out.getvalue().rstrip() return True
def process(fs): n = fs.nstates np.set_printoptions(linewidth=200) out = StringIO() # Sample a symmetric rate matrix and a stationary distribution, # then construct the rate matrix R. S = MatrixUtil.sample_pos_sym_matrix(n) v = mrate.sample_distn(n) psi = np.sqrt(v) R = (S.T / psi).T * psi R -= np.diag(np.sum(R, axis=1)) R_W, R_V = scipy.linalg.eig(R) # Add some deletion. deletion = 0.1 Q = np.zeros((n + 1, n + 1)) for i in range(n): for j in range(n): Q[i, j] = R[i, j] for i in range(n): Q[i, -1] = deletion Q -= np.diag(np.sum(Q, axis=1)) Q_W, Q_V = scipy.linalg.eig(Q) print >> out, 'deletion rate:' print >> out, deletion print >> out print >> out, 'sampled rate matrix R:' print >> out, R print >> out print >> out, 'spectrum of R:' print >> out, R_W print >> out print >> out, 'rate matrix with deletion Q:' print >> out, Q print >> out print >> out, 'spectrum of Q:' print >> out, Q_W print >> out return out.getvalue().rstrip()
def process(fs): n = fs.nstates np.set_printoptions(linewidth=200) out = StringIO() # Sample a symmetric rate matrix and a stationary distribution, # then construct the rate matrix R. S = MatrixUtil.sample_pos_sym_matrix(n) v = mrate.sample_distn(n) psi = np.sqrt(v) R = (S.T / psi).T * psi R -= np.diag(np.sum(R, axis=1)) R_W, R_V = scipy.linalg.eig(R) # Add some deletion. deletion = 0.1 Q = np.zeros((n+1, n+1)) for i in range(n): for j in range(n): Q[i, j] = R[i, j] for i in range(n): Q[i, -1] = deletion Q -= np.diag(np.sum(Q, axis=1)) Q_W, Q_V = scipy.linalg.eig(Q) print >> out, 'deletion rate:' print >> out, deletion print >> out print >> out, 'sampled rate matrix R:' print >> out, R print >> out print >> out, 'spectrum of R:' print >> out, R_W print >> out print >> out, 'rate matrix with deletion Q:' print >> out, Q print >> out print >> out, 'spectrum of Q:' print >> out, Q_W print >> out return out.getvalue().rstrip()
def __call__(self): """ Look for a counterexample. """ # Sample a rate matrix. # Use a trick by Robert Kern to left and right multiply by diagonals. # http://mail.scipy.org/pipermail/numpy-discussion/2007-March/ # 026809.html S = MatrixUtil.sample_pos_sym_matrix(self.nstates) v = mrate.sample_distn(self.nstates) R = (v**-0.5)[:, np.newaxis] * S * (v**0.5) R -= np.diag(np.sum(R, axis=1)) # Sample a short time and a longer time. # For each row of the transition matrix, # look at the K-L divergence to the stationary distribution, # and check that it is smaller at the larger time. ta, tb = sorted((random.expovariate(1), random.expovariate(1))) Pa = scipy.linalg.expm(R * ta) Pb = scipy.linalg.expm(R * tb) for rowa, rowb in zip(Pa, Pb): self.nrows_total += 1 if np.allclose(rowa, rowb): self.nrows_allclose += 1 continue kla = sum(x * math.log(x / y) for x, y in zip(rowa, v)) klb = sum(x * math.log(x / y) for x, y in zip(rowb, v)) if kla < klb: out = StringIO() print >> out, 'found a counterexample' print >> out print >> out, 'sampled symmetric matrix S:' print >> out, S print >> out print >> out, 'sampled stationary distribution v:' print >> out, v print >> out print >> out, 'implied rate matrix R:' print >> out, R print >> out print >> out, 'sampled time ta:', ta print >> out, 'sampled time tb:', tb print >> out print >> out, 'transition matrix Pa:' print >> out, Pa print >> out print >> out, 'transition matrix Pb:' print >> out, Pb print >> out print >> out, 'relevant row of Pa:' print >> out, rowa print >> out print >> out, 'relevant row of Pb:' print >> out, rowb print >> out print >> out, 'K-L divergence of row of Pa from v:' print >> out, kla print >> out print >> out, 'K-L divergence of row of Pb from v:' print >> out, klb print >> out self.counterexample = out.getvalue().rstrip() return True
def process(fs): n = fs.nstates np.set_printoptions(linewidth=200) out = StringIO() # Sample a symmetric rate matrix and a stationary distribution, # then construct the rate matrix R. S = MatrixUtil.sample_pos_sym_matrix(n) v = mrate.sample_distn(n) psi = np.sqrt(v) R = (S.T / psi).T * psi R -= np.diag(np.sum(R, axis=1)) R_W, R_V = scipy.linalg.eig(R) # construct the symmetric matrix that is similar to R R_sim = (R.T * psi).T / psi if not np.allclose(R_sim, R_sim.T): raise ValueError('the similar symmetric matrix is not symmetric...') R_sim_W, R_sim_V = scipy.linalg.eigh(R_sim) R_gap = -R_sim_W[-2] v2 = R_sim_V.T[-2]**2 # reconstruct the eigenvectors of R R_V_rebuilt = (R_sim_V.T / psi).T # Try to make the commute time matrix. # R_sim is a lot like a Laplacian matrix, so lets pseudoinvert it. R_sim_pinv = scipy.linalg.pinv(R_sim) myouter = np.outer(np.ones(n), np.diag(R_sim_pinv)) D = -(myouter + myouter.T - 2 * R_sim_pinv) D_commute = mrate.get_commute_distance_matrix(R, v) if not np.allclose(D, D_commute): raise ValueError('error computing commute distances') HDH = MatrixUtil.double_centered(D) HDH_W, HDH_V = scipy.linalg.eigh(HDH) # compute squared pairwise distances brutely X = R_sim_V.T[:-1].T / np.sqrt(-R_sim_W[:-1]) D_brute = np.array([[np.dot(b - a, b - a) for a in X] for b in X]) print >> out, 'reconstructed EDM:' print >> out, D print >> out D = (D.T / psi).T / psi print >> out, 'divide by square roots of stationary probabilities:' print >> out, D print >> out print >> out, 'eigh of centered EDM:' print >> out, 'eigenvalues:' print >> out, HDH_W print >> out, 'reciprocal nonzero eigenvalues:' print >> out, 1 / HDH_W print >> out, 'eigenvectors:' print >> out, HDH_V print >> out print >> out, 'squared distances computed brutely:' print >> out, D_brute print >> out print >> out, '1 / (h * max(D)):', 1 / (np.dot(v, 1 - v) * np.max(D)) print >> out, '1 / max(D):', 1 / np.max(D) print >> out # report some more standard stuff print >> out, 'sampled rate matrix R:' print >> out, R print >> out, 'stationary distn:', v print >> out, '1/R01 + 1/R10:', 1 / R[0, 1] + 1 / R[1, 0] print >> out print >> out, 'scipy.linagl.eig(R):' print >> out, R_W print >> out, R_V print >> out print >> out, 'symmetric matrix similar to R:' print >> out, R_sim print >> out print >> out, 'eigh of the symmetric similar matrix to R:' print >> out, R_sim_W print >> out, R_sim_V print >> out, 'spectral gap:', R_gap print >> out, 'entrywise squares of eigenvectors:' print >> out, R_sim_V**2 print >> out, 'a bilinear form involving a fiedler-like eigenvector:' print >> out, ndot(R_sim_V.T[-2], R_sim, R_sim_V.T[-2]) print >> out, 'expected rate:', -np.dot(v, np.diag(R)) print >> out, 'second order expected rate:', -np.dot(v2, np.diag(R)) print >> out print >> out, 'eigenvectors of R from eigenvectors of the similar matrix:' print >> out, R_sim_W print >> out, R_V_rebuilt print >> out return out.getvalue().rstrip()
def process(fs): n = fs.nstates np.set_printoptions(linewidth=200) out = StringIO() # Sample a symmetric rate matrix and a stationary distribution, # then construct the rate matrix R. S = MatrixUtil.sample_pos_sym_matrix(n) v = mrate.sample_distn(n) psi = np.sqrt(v) R = (S.T / psi).T * psi R -= np.diag(np.sum(R, axis=1)) R_W, R_V = scipy.linalg.eig(R) # construct the symmetric matrix that is similar to R R_sim = (R.T * psi).T / psi if not np.allclose(R_sim, R_sim.T): raise ValueError('the similar symmetric matrix is not symmetric...') R_sim_W, R_sim_V = scipy.linalg.eigh(R_sim) R_gap = -R_sim_W[-2] v2 = R_sim_V.T[-2]**2 # reconstruct the eigenvectors of R R_V_rebuilt = (R_sim_V.T / psi).T # Try to make the commute time matrix. # R_sim is a lot like a Laplacian matrix, so lets pseudoinvert it. R_sim_pinv = scipy.linalg.pinv(R_sim) myouter = np.outer(np.ones(n), np.diag(R_sim_pinv)) D = -(myouter + myouter.T - 2*R_sim_pinv) D_commute = mrate.get_commute_distance_matrix(R, v) if not np.allclose(D, D_commute): raise ValueError('error computing commute distances') HDH = MatrixUtil.double_centered(D) HDH_W, HDH_V = scipy.linalg.eigh(HDH) # compute squared pairwise distances brutely X = R_sim_V.T[:-1].T / np.sqrt(-R_sim_W[:-1]) D_brute = np.array([[np.dot(b - a, b - a) for a in X] for b in X]) print >> out, 'reconstructed EDM:' print >> out, D print >> out D = (D.T / psi).T / psi print >> out, 'divide by square roots of stationary probabilities:' print >> out, D print >> out print >> out, 'eigh of centered EDM:' print >> out, 'eigenvalues:' print >> out, HDH_W print >> out, 'reciprocal nonzero eigenvalues:' print >> out, 1 / HDH_W print >> out, 'eigenvectors:' print >> out, HDH_V print >> out print >> out, 'squared distances computed brutely:' print >> out, D_brute print >> out print >> out, '1 / (h * max(D)):', 1 / (np.dot(v, 1-v) * np.max(D)) print >> out, '1 / max(D):', 1 / np.max(D) print >> out # report some more standard stuff print >> out, 'sampled rate matrix R:' print >> out, R print >> out, 'stationary distn:', v print >> out, '1/R01 + 1/R10:', 1/R[0,1] + 1/R[1,0] print >> out print >> out, 'scipy.linagl.eig(R):' print >> out, R_W print >> out, R_V print >> out print >> out, 'symmetric matrix similar to R:' print >> out, R_sim print >> out print >> out, 'eigh of the symmetric similar matrix to R:' print >> out, R_sim_W print >> out, R_sim_V print >> out, 'spectral gap:', R_gap print >> out, 'entrywise squares of eigenvectors:' print >> out, R_sim_V ** 2 print >> out, 'a bilinear form involving a fiedler-like eigenvector:' print >> out, ndot(R_sim_V.T[-2], R_sim, R_sim_V.T[-2]) print >> out, 'expected rate:', -np.dot(v, np.diag(R)) print >> out, 'second order expected rate:', -np.dot(v2, np.diag(R)) print >> out print >> out, 'eigenvectors of R from eigenvectors of the similar matrix:' print >> out, R_sim_W print >> out, R_V_rebuilt print >> out return out.getvalue().rstrip()
def __call__(self): """ Look for a counterexample. """ # Sample a rate matrix. # Use a trick by Robert Kern to left and right multiply by diagonals. # http://mail.scipy.org/pipermail/numpy-discussion/2007-March/ # 026809.html S = MatrixUtil.sample_pos_sym_matrix(self.nstates) v = mrate.sample_distn(self.nstates) R = (v**-0.5)[:,np.newaxis] * S * (v**0.5) R -= np.diag(np.sum(R, axis=1)) # Sample a short time and a longer time. # For each row of the transition matrix, # look at the K-L divergence to the stationary distribution, # and check that it is smaller at the larger time. ta, tb = sorted((random.expovariate(1), random.expovariate(1))) Pa = scipy.linalg.expm(R * ta) Pb = scipy.linalg.expm(R * tb) for rowa, rowb in zip(Pa, Pb): self.nrows_total += 1 if np.allclose(rowa, rowb): self.nrows_allclose += 1 continue kla = sum(x*math.log(x/y) for x, y in zip(rowa, v)) klb = sum(x*math.log(x/y) for x, y in zip(rowb, v)) if kla < klb: out = StringIO() print >> out, 'found a counterexample' print >> out print >> out, 'sampled symmetric matrix S:' print >> out, S print >> out print >> out, 'sampled stationary distribution v:' print >> out, v print >> out print >> out, 'implied rate matrix R:' print >> out, R print >> out print >> out, 'sampled time ta:', ta print >> out, 'sampled time tb:', tb print >> out print >> out, 'transition matrix Pa:' print >> out, Pa print >> out print >> out, 'transition matrix Pb:' print >> out, Pb print >> out print >> out, 'relevant row of Pa:' print >> out, rowa print >> out print >> out, 'relevant row of Pb:' print >> out, rowb print >> out print >> out, 'K-L divergence of row of Pa from v:' print >> out, kla print >> out print >> out, 'K-L divergence of row of Pb from v:' print >> out, klb print >> out self.counterexample = out.getvalue().rstrip() return True
def process(fs): n = fs.nstates np.set_printoptions(linewidth=200) out = StringIO() # Sample a symmetric rate matrix and a stationary distribution, # then construct the rate matrix R. S = MatrixUtil.sample_pos_sym_matrix(n) v = mrate.sample_distn(n) psi = np.sqrt(v) R = (S.T / psi).T * psi R -= np.diag(np.sum(R, axis=1)) R_W, R_V = scipy.linalg.eig(R) # construct the symmetric matrix that is similar to R R_sim = (R.T * psi).T / psi if not np.allclose(R_sim, R_sim.T): raise ValueError('the similar symmetric matrix is not symmetric...') R_sim_W, R_sim_V = scipy.linalg.eigh(R_sim) R_gap = -R_sim_W[-2] v2 = R_sim_V.T[-2]**2 # reconstruct the eigenvectors of R R_V_rebuilt = (R_sim_V.T / psi).T # Sample some numbers then subtract mean then normalize. dv = np.random.exponential(1, n) dv -= np.mean(dv) dv *= fs.eps / np.dot(dv, dv) qv = v + dv if any(qv < 0) or any(1 < qv): raise ValueError( 'the stationary distribution change was too large ' 'for the randomly sampled process') qpsi = np.sqrt(qv) # define the rate matrix if fs.knudsen: Q = (S.T / qpsi).T * qpsi elif fs.sella: Q = R.copy() for a in range(n): for b in range(n): if a != b: tau = (qv[b] / v[b]) / (qv[a] / v[a]) Q[a, b] *= math.log(tau) / (1 - 1/tau) Q -= np.diag(np.sum(Q, axis=1)) # construct the symmetric matrix that is similar to Q Q_sim = (Q.T * qpsi).T / qpsi Q_sim_W, Q_sim_V = scipy.linalg.eigh(Q_sim) Q_gap = -Q_sim_W[-2] # report some stuff print >> out, 'sampled rate matrix R:' print >> out, R print >> out, 'stationary distn:', v print >> out print >> out, 'scipy.linagl.eig(R):' print >> out, R_W print >> out, R_V print >> out print >> out, 'symmetric matrix similar to R:' print >> out, R_sim print >> out print >> out, 'eigh of the symmetric similar matrix to R:' print >> out, R_sim_W print >> out, R_sim_V print >> out, 'spectral gap:', R_gap print >> out, 'entrywise squares of eigenvectors:' print >> out, R_sim_V ** 2 print >> out, 'a bilinear form involving a fiedler-like eigenvector:' print >> out, ndot(R_sim_V.T[-2], R_sim, R_sim_V.T[-2]) print >> out, 'expected rate:', -np.dot(v, np.diag(R)) print >> out, 'second order expected rate:', -np.dot(v2, np.diag(R)) print >> out print >> out, 'eigenvectors of R from eigenvectors of the similar matrix:' print >> out, R_sim_W print >> out, R_V_rebuilt print >> out print >> out, 'mutation-selection balance matrix Q:' print >> out, Q print >> out, 'stationary distn:', qv print >> out, 'spectral gap:', Q_gap print >> out print >> out, 'symmetric matrix similar to Q:' print >> out, Q_sim print >> out print >> out, 'pi(Q) - pi(R):', dv print >> out, 'gap(Q) - gap(R):', Q_gap - R_gap print >> out, 'diag(Q) - diag(R):', np.diag(Q) - np.diag(R) print >> out, 'trace(Q) - trace(R):', np.trace(Q) - np.trace(R) print >> out print >> out, 'rate away estimate of spectral gap change:' print >> out, np.dot(np.diag(Q) - np.diag(R), R_sim_V.T[-2]**2) print >> out return out.getvalue().rstrip()
def process(fs): n = fs.nstates np.set_printoptions(linewidth=200) out = StringIO() # Sample a symmetric rate matrix and a stationary distribution, # then construct the rate matrix R. S = MatrixUtil.sample_pos_sym_matrix(n) v = mrate.sample_distn(n) psi = np.sqrt(v) R = (S.T / psi).T * psi R -= np.diag(np.sum(R, axis=1)) R_W, R_V = scipy.linalg.eig(R) # construct the symmetric matrix that is similar to R R_sim = (R.T * psi).T / psi if not np.allclose(R_sim, R_sim.T): raise ValueError('the similar symmetric matrix is not symmetric...') R_sim_W, R_sim_V = scipy.linalg.eigh(R_sim) R_gap = -R_sim_W[-2] v2 = R_sim_V.T[-2]**2 # reconstruct the eigenvectors of R R_V_rebuilt = (R_sim_V.T / psi).T # Sample some numbers then subtract mean then normalize. dv = np.random.exponential(1, n) dv -= np.mean(dv) dv *= fs.eps / np.dot(dv, dv) qv = v + dv if any(qv < 0) or any(1 < qv): raise ValueError('the stationary distribution change was too large ' 'for the randomly sampled process') qpsi = np.sqrt(qv) # define the rate matrix if fs.knudsen: Q = (S.T / qpsi).T * qpsi elif fs.sella: Q = R.copy() for a in range(n): for b in range(n): if a != b: tau = (qv[b] / v[b]) / (qv[a] / v[a]) Q[a, b] *= math.log(tau) / (1 - 1 / tau) Q -= np.diag(np.sum(Q, axis=1)) # construct the symmetric matrix that is similar to Q Q_sim = (Q.T * qpsi).T / qpsi Q_sim_W, Q_sim_V = scipy.linalg.eigh(Q_sim) Q_gap = -Q_sim_W[-2] # report some stuff print >> out, 'sampled rate matrix R:' print >> out, R print >> out, 'stationary distn:', v print >> out print >> out, 'scipy.linagl.eig(R):' print >> out, R_W print >> out, R_V print >> out print >> out, 'symmetric matrix similar to R:' print >> out, R_sim print >> out print >> out, 'eigh of the symmetric similar matrix to R:' print >> out, R_sim_W print >> out, R_sim_V print >> out, 'spectral gap:', R_gap print >> out, 'entrywise squares of eigenvectors:' print >> out, R_sim_V**2 print >> out, 'a bilinear form involving a fiedler-like eigenvector:' print >> out, ndot(R_sim_V.T[-2], R_sim, R_sim_V.T[-2]) print >> out, 'expected rate:', -np.dot(v, np.diag(R)) print >> out, 'second order expected rate:', -np.dot(v2, np.diag(R)) print >> out print >> out, 'eigenvectors of R from eigenvectors of the similar matrix:' print >> out, R_sim_W print >> out, R_V_rebuilt print >> out print >> out, 'mutation-selection balance matrix Q:' print >> out, Q print >> out, 'stationary distn:', qv print >> out, 'spectral gap:', Q_gap print >> out print >> out, 'symmetric matrix similar to Q:' print >> out, Q_sim print >> out print >> out, 'pi(Q) - pi(R):', dv print >> out, 'gap(Q) - gap(R):', Q_gap - R_gap print >> out, 'diag(Q) - diag(R):', np.diag(Q) - np.diag(R) print >> out, 'trace(Q) - trace(R):', np.trace(Q) - np.trace(R) print >> out print >> out, 'rate away estimate of spectral gap change:' print >> out, np.dot(np.diag(Q) - np.diag(R), R_sim_V.T[-2]**2) print >> out return out.getvalue().rstrip()
def process(fs): nstates = fs.nstates np.set_printoptions(linewidth=200) t = fs.t ### sample a random time ##time_mu = 0.01 ##t = random.expovariate(1 / time_mu) # Sample a rate matrix. # Use a trick by Robert Kern to left and right multiply by diagonals. # http://mail.scipy.org/pipermail/numpy-discussion/2007-March/ # 026809.html S = MatrixUtil.sample_pos_sym_matrix(nstates) v = mrate.sample_distn(nstates) R = (v**-0.5)[:,np.newaxis] * S * (v**0.5) R -= np.diag(np.sum(R, axis=1)) # Construct a parent-independent process # with the same max rate and stationary distribution # as the sampled process. if fs.parent_indep: Q = np.outer(np.ones(nstates), v) Q -= np.diag(np.sum(Q, axis=1)) pi_rescaling_factor = max(np.diag(R) / np.diag(Q)) Q *= pi_rescaling_factor Z = msimpl.get_fast_meta_f81_autobarrier(Q) # Construct a child-independent process # with the same expected rate # as the sampled process if fs.child_indep: C = np.outer(1/v, np.ones(nstates)) C -= np.diag(np.sum(C, axis=1)) ci_rescaling_factor = np.max(R / C) #expected_rate = -ndot(np.diag(R), v) #ci_rescaling_factor = expected_rate / (nstates*(nstates-1)) #ci_rescaling_factor = expected_rate / (nstates*nstates) C *= ci_rescaling_factor Q = C if fs.bipartitioned: Q = msimpl.get_fast_meta_f81_autobarrier(R) # Check that the mutual information of the # parent independent process is smaller. out = StringIO() print >> out, 'sampled symmetric part of the rate matrix S:' print >> out, S print >> out print >> out, 'sampled stationary distribution v:' print >> out, v print >> out print >> out, 'shannon entropy of stationary distribution v:' print >> out, -np.dot(np.log(v), v) print >> out print >> out, 'sqrt stationary distribution:' print >> out, np.sqrt(v) print >> out print >> out, 'implied rate matrix R:' print >> out, R print >> out print >> out, 'eigenvalues of R:', scipy.linalg.eigvals(R) print >> out print >> out, 'relaxation rate of R:', print >> out, sorted(np.abs(scipy.linalg.eigvals(R)))[1] print >> out print >> out, 'expected rate of R:', mrate.Q_to_expected_rate(R) print >> out print >> out, 'cheeger bounds of R:', get_cheeger_bounds(R, v) print >> out print >> out, 'randomization rate of R:', get_randomization_rate(R, v) print >> out candidates = [get_randomization_candidate(R, v, i) for i in range(nstates)] if np.allclose(get_randomization_rate(R, v), candidates): print >> out, 'all candidates are equal to this rate' else: print >> out, 'not all candidates are equal to this rate' print >> out print >> out, 'simplified rate matrix Q:' print >> out, Q print >> out qv = mrate.R_to_distn(Q) print >> out, 'stationary distribution of Q:' print >> out, qv print >> out print >> out, 'ratio qv/v:' print >> out, qv / v print >> out print >> out, 'shannon entropy of stationary distribution of Q:' print >> out, -np.dot(np.log(qv), qv) print >> out if fs.parent_indep: print >> out, 'parent independent rescaling factor:' print >> out, pi_rescaling_factor print >> out if fs.child_indep: print >> out, 'child independent rescaling factor:' print >> out, ci_rescaling_factor print >> out print >> out, 'eigenvalues of Q:', scipy.linalg.eigvals(Q) print >> out print >> out, 'relaxation rate of Q:', print >> out, sorted(np.abs(scipy.linalg.eigvals(Q)))[1] print >> out print >> out, 'expected rate of Q:', mrate.Q_to_expected_rate(Q) print >> out print >> out, 'cheeger bounds of Q:', get_cheeger_bounds(Q, v) print >> out print >> out, 'randomization rate of Q:', get_randomization_rate(Q, v) print >> out candidates = [get_randomization_candidate(Q, v, i) for i in range(nstates)] if np.allclose(get_randomization_rate(Q, v), candidates): print >> out, 'all candidates are equal to this rate' else: print >> out, 'warning: not all candidates are equal to this rate' print >> out print >> out, 'E(rate) of Q divided by logical entropy:', print >> out, mrate.Q_to_expected_rate(Q) / ndot(v, 1-v) print >> out print >> out, 'symmetric matrix similar to Q:' S = ndot(np.diag(np.sqrt(v)), Q, np.diag(1/np.sqrt(v))) print >> out, S print >> out print >> out, 'eigendecomposition of the similar matrix:' W, V = scipy.linalg.eigh(S) print >> out, V print >> out, np.diag(W) print >> out, V.T print >> out # print >> out, 'time:', t print >> out print >> out, 'stationary distn logical entropy:', ndot(v, 1-v) print >> out # P_by_hand = get_pi_transition_matrix(Q, v, t) print >> out, 'simplified-process transition matrix computed by hand:' print >> out, P_by_hand print >> out print >> out, 'simplified-process transition matrix computed by expm:' print >> out, scipy.linalg.expm(Q*t) print >> out # print >> out, 'simplified-process m.i. by hand:' print >> out, get_pi_mi(Q, v, t) print >> out print >> out, 'simplified-process m.i. by expm:' print >> out, ctmcmi.get_expected_ll_ratio(Q, t) print >> out # print >> out, 'original process m.i. by expm:' print >> out, ctmcmi.get_expected_ll_ratio(R, t) print >> out # print >> out, 'stationary distn Shannon entropy:' print >> out, -ndot(v, np.log(v)) print >> out # if fs.parent_indep: print >> out, 'approximate simplified process m.i. 2nd order approx:' print >> out, get_pi_mi_t2_approx(Q, v, t) print >> out print >> out, 'approximate simplified process m.i. "better" approx:' print >> out, get_pi_mi_t2_diag_approx(Q, v, t) print >> out print >> out, '"f81-ization plus barrier" of pure f81-ization:' print >> out, Z print >> out # return out.getvalue().rstrip()