def report_summary(L, neg_B): U, s, Vh = scipy.linalg.svd(neg_B, full_matrices=False, compute_uv=True) x = U.T[0] y = Vh[0] print 'L:' print L print print 'neg_B:' print neg_B print print 'U:' print U print print 's:' print s print print 'Vh:' print Vh print print 'U S vh:' print ndot(U, np.diag(s), Vh) print print 'approx:' print s[0] * np.outer(x, y) print xycat = x.tolist() + y.tolist() signs = set(np.sign(xycat).astype(np.int)) if set([-1, 1]) <= signs: raise ValueError('multiple signs in the concatenated xy')
def __init__(self, Q): """ @param Q: rate matrix """ # define intermediate variables v = mrate.R_to_distn(Q) n = len(v) psi = np.sqrt(v) c_low, c_mid, c_high = cheeger.get_cheeger_bounds(Q, v) # define member variables to summarize the rate matrix self.rate_matrix = Q self.exch_matrix = Q / v if not np.allclose(self.exch_matrix, self.exch_matrix.T): print self.exch_matrix raise ValueError('expected symmetry') self.sim_sym_matrix = np.outer(psi, 1 / psi) * Q if not np.allclose(self.sim_sym_matrix, self.sim_sym_matrix.T): print self.sim_sym_matrix raise ValueError('expected symmetry') self.distn = v self.distn_shannon_entropy = -ndot(np.log(v), v) self.distn_logical_entropy = ndot(v, 1 - v) self.expected_rate = -ndot(np.diag(Q), v) self.spectrum = scipy.linalg.eigvalsh(self.sim_sym_matrix) self.spectral_gap = -self.spectrum[-2] self.isoperimetric_low = c_low self.isoperimetric_constant = c_mid self.isoperimetric_high = c_high self.trace_bound_high = -sum(np.diag(Q)) / (n - 1)
def __init__(self, Q): """ @param Q: rate matrix """ # define intermediate variables v = mrate.R_to_distn(Q) n = len(v) psi = np.sqrt(v) c_low, c_mid, c_high = cheeger.get_cheeger_bounds(Q, v) # define member variables to summarize the rate matrix self.rate_matrix = Q self.exch_matrix = Q / v if not np.allclose(self.exch_matrix, self.exch_matrix.T): print self.exch_matrix raise ValueError('expected symmetry') self.sim_sym_matrix = np.outer(psi, 1/psi) * Q if not np.allclose(self.sim_sym_matrix, self.sim_sym_matrix.T): print self.sim_sym_matrix raise ValueError('expected symmetry') self.distn = v self.distn_shannon_entropy = -ndot(np.log(v), v) self.distn_logical_entropy = ndot(v, 1-v) self.expected_rate = -ndot(np.diag(Q), v) self.spectrum = scipy.linalg.eigvalsh(self.sim_sym_matrix) self.spectral_gap = -self.spectrum[-2] self.isoperimetric_low = c_low self.isoperimetric_constant = c_mid self.isoperimetric_high = c_high self.trace_bound_high = -sum(np.diag(Q)) / (n-1)
def get_p_id_deriv_ratio(R, t): """ Get (second derivative of p_identity) divided by (first derivative of p_id) """ n = len(R) # symmetrize the rate matrix v = mrate.R_to_distn(R) lam = np.diag(np.sqrt(v)) rlam = np.diag(np.reciprocal(np.sqrt(v))) S = ndot(lam, -R, rlam) # eigendecompose the symmetrized rate matrix # this should satisfy R = ndot(rlam, V, np.diag(-W), V.T, lam) W, V = scipy.linalg.eigh(S) # get P and its two derivatives P = ndot(rlam, V, np.diag(np.exp(-W*t)), V.T, lam) P_dt = ndot(rlam, V, np.diag(-W*np.exp(-W*t)), V.T, lam) P_dtt = ndot(rlam, V, np.diag(W*W*np.exp(-W*t)), V.T, lam) # get the two derivatives of expected identity e_dt = 0.0 e_dtt = 0.0 for i in range(n): for j in range(n): e_dt += v[i] * P_dt[i, i] e_dtt += v[i] * P_dtt[i, i] return e_dtt / e_dt
def get_p_id_deriv_ratio(R, t): """ Get (second derivative of p_identity) divided by (first derivative of p_id) """ n = len(R) # symmetrize the rate matrix v = mrate.R_to_distn(R) lam = np.diag(np.sqrt(v)) rlam = np.diag(np.reciprocal(np.sqrt(v))) S = ndot(lam, -R, rlam) # eigendecompose the symmetrized rate matrix # this should satisfy R = ndot(rlam, V, np.diag(-W), V.T, lam) W, V = scipy.linalg.eigh(S) # get P and its two derivatives P = ndot(rlam, V, np.diag(np.exp(-W * t)), V.T, lam) P_dt = ndot(rlam, V, np.diag(-W * np.exp(-W * t)), V.T, lam) P_dtt = ndot(rlam, V, np.diag(W * W * np.exp(-W * t)), V.T, lam) # get the two derivatives of expected identity e_dt = 0.0 e_dtt = 0.0 for i in range(n): for j in range(n): e_dt += v[i] * P_dt[i, i] e_dtt += v[i] * P_dtt[i, i] return e_dtt / e_dt
def get_rhs_old(M, nkeep, ndelete): """ This is the rhs of a putative identity. """ n = nkeep + ndelete R = get_deletion_projection(nkeep, ndelete) H = get_centering_projection(n) P = ndot(H, R, H) inside = ndot(P.T, M, P) return ndot(P, np.linalg.pinv(inside), P.T)
def get_rhs_also_old(M, nkeep, ndelete): """ This is the rhs of a putative identity. """ n = nkeep + ndelete R = get_deletion_projection(nkeep, ndelete) H = get_centering_projection(n) # build up the matrix X = M X = ndot(R, H, X, H, R) X = np.linalg.pinv(X) X = ndot(R, H, X, H, R) return X
def get_rhs(M, nkeep, ndelete): n = nkeep + ndelete P = get_p_centering_partial_del(nkeep, ndelete) R = get_p_centering_partial(nkeep, ndelete) H = get_p_centering(n) D = get_p_del(nkeep, ndelete) # define the target value in a few different ways target_a = schur_del(np.linalg.pinv(ndot(H, M, H)), nkeep, ndelete) target_b = np.linalg.pinv(ndot(P, M, P)) target_c = np.linalg.pinv(ndot(D, R, M, R.T, D)) # Try to find another way to get the target value using projections, # hopefully a way that uses projections in a way that can be shown # to be equivalent to taking a schur complement in a pseudoinverse. return np.linalg.pinv(ndot(D, R, M, R.T, D))
def get_identicality_params(R): """ This returns the parameters for an identicality function. If the rate matrix has n states then the identicality function is f(t) = a1*exp(b1*t) + a2*exp(b2*t) + ... + a{n-1}*exp(b{n-1}*t) + c @param R: time reversible rate matrix @return: a array, b array, c """ n = len(R) pi_arr = R_to_distn(R) # symmetrize lam = np.diag(np.sqrt(pi_arr)) rlam = np.diag(np.reciprocal(np.sqrt(pi_arr))) S = ndot(lam, R, rlam) print 'S should be symmetric:' print S print S - S.T # eigendecompose the symmetric matrix W, V = scipy.linalg.eigh(S) w_v_pairs = [(W[i], V[:,i]) for i in range(n)] # get the exponential coefficients eps = 1e-12 identicality_coeffs = [ np.dot(pi_arr, v*v) for w, v in w_v_pairs if abs(w) > eps] # get the exponential rate constants identicality_rates = [ w for w in W if abs(w) > eps] # get the one dimensional constant identicality_const = np.inner(pi_arr, pi_arr) # return the identicality parameters return (identicality_coeffs, identicality_rates, identicality_const)
def get_identicality_params(R): """ This returns the parameters for an identicality function. If the rate matrix has n states then the identicality function is f(t) = a1*exp(b1*t) + a2*exp(b2*t) + ... + a{n-1}*exp(b{n-1}*t) + c @param R: time reversible rate matrix @return: a array, b array, c """ n = len(R) pi_arr = R_to_distn(R) # symmetrize lam = np.diag(np.sqrt(pi_arr)) rlam = np.diag(np.reciprocal(np.sqrt(pi_arr))) S = ndot(lam, R, rlam) print 'S should be symmetric:' print S print S - S.T # eigendecompose the symmetric matrix W, V = scipy.linalg.eigh(S) w_v_pairs = [(W[i], V[:, i]) for i in range(n)] # get the exponential coefficients eps = 1e-12 identicality_coeffs = [ np.dot(pi_arr, v * v) for w, v in w_v_pairs if abs(w) > eps ] # get the exponential rate constants identicality_rates = [w for w in W if abs(w) > eps] # get the one dimensional constant identicality_const = np.inner(pi_arr, pi_arr) # return the identicality parameters return (identicality_coeffs, identicality_rates, identicality_const)
def get_response_content(fs): # set up print options np.set_printoptions( linewidth=1000000, threshold=1000000, ) out = StringIO() # define the Laplacian matrix L = np.array([ [ 1, 0, 0, 0, 0, -1, 0, 0], [ 0, 2, 0, 0, 0, -2, 0, 0], [ 0, 0, 3, 0, 0, 0, -3, 0], [ 0, 0, 0, 2, 0, 0, -2, 0], [ 0, 0, 0, 0, 1, 0, 0, -1], [-1, -2, 0, 0, 0, 4, 0, -1], [ 0, 0, -3, -2, 0, 0, 6, -1], [ 0, 0, 0, 0, -1, -1, -1, 3], ], dtype=float) # remove the last two columns by schur complementation L_schur = L[:-2, :-2] - ndot( L[:-2, -2:], scipy.linalg.inv(L[-2:, -2:]), L[-2:, :-2]) # get the trailing block of the matrix L_schur_component = L_schur[-4:, -4:] # get the part corresponding to the inverse of a rooted covariance matrix L_schur_rooted = L_schur_component[:-1, :-1] # get the corresponding covariance matrix cov = scipy.linalg.inv(L_schur_rooted) # print the matrices print >> out, 'L:' print >> out, L print >> out print >> out, 'schur complement of two internal vertices (7, 8) in L:' print >> out, L_schur print >> out print >> out, 'a component (3, 4, 5, 6) of the schur complement:' print >> out, L_schur_component print >> out print >> out, 'a piece (3, 4, 5) of the component:' print >> out, L_schur_rooted print >> out print >> out, 'the corresponding rooted covariance matrix:' print >> out, cov print >> out print >> out, 'trace of covariance matrix:' print >> out, np.trace(cov) print >> out # show the result return out.getvalue()
def schur(M, nsmall): A = M[:nsmall, :nsmall] B = M[:nsmall, nsmall:] C = M[nsmall:, nsmall:] d = np.linalg.det(C) if abs(d) < 1e-5: raise ValueError('small determinant for schur complement') C_inv = np.linalg.inv(C) return A - ndot(B, C_inv, B.T)
def symmetrized(R): """ Get the symmetrized matrix. This returns a symmetric matrix that is not a rate matrix because rows do not sum to zero. """ v = R_to_distn(R) lam = np.diag(np.sqrt(v)) rlam = np.diag(np.reciprocal(np.sqrt(v))) return ndot(lam, R, rlam)
def _get_expectation(R, t): n = len(R) # symmetrize the rate matrix v = mrate.R_to_distn(R) lam = np.diag(np.sqrt(v)) rlam = np.diag(np.reciprocal(np.sqrt(v))) S = ndot(lam, -R, rlam) # eigendecompose the symmetrized rate matrix # this should satisfy R = ndot(rlam, V, np.diag(-W), V.T, lam) W, V = scipy.linalg.eigh(S) # get P and its two derivatives P = ndot(rlam, V, np.diag(np.exp(-W * t)), V.T, lam) P_dt = ndot(rlam, V, np.diag(-W * np.exp(-W * t)), V.T, lam) P_dtt = ndot(rlam, V, np.diag(W * W * np.exp(-W * t)), V.T, lam) M = (P * P_dtt - P_dt * P_dt) / P expectation = 0.0 for i in range(n): for j in range(n): expectation += v[i] * M[i, j] return expectation
def symmetrized_known_distn(R, v): """ Get the symmetrized matrix of a reversible markov process. This returns a symmetric matrix that is not a rate matrix because rows do not sum to zero. The returned matrix should be similar to R in the sense of linear algebra matrix similarity. """ lam = np.diag(np.sqrt(v)) rlam = np.diag(np.reciprocal(np.sqrt(v))) return ndot(lam, R, rlam)
def _get_expectation(R, t): n = len(R) # symmetrize the rate matrix v = mrate.R_to_distn(R) lam = np.diag(np.sqrt(v)) rlam = np.diag(np.reciprocal(np.sqrt(v))) S = ndot(lam, -R, rlam) # eigendecompose the symmetrized rate matrix # this should satisfy R = ndot(rlam, V, np.diag(-W), V.T, lam) W, V = scipy.linalg.eigh(S) # get P and its two derivatives P = ndot(rlam, V, np.diag(np.exp(-W*t)), V.T, lam) P_dt = ndot(rlam, V, np.diag(-W*np.exp(-W*t)), V.T, lam) P_dtt = ndot(rlam, V, np.diag(W*W*np.exp(-W*t)), V.T, lam) M = (P*P_dtt - P_dt*P_dt) / P expectation = 0.0 for i in range(n): for j in range(n): expectation += v[i] * M[i, j] return expectation
def bott_duffin(M): """ We pretend that P_L is H. """ nrows, ncols = M.shape if nrows != ncols: raise ValueError('expected a square matrix') e = np.ones(nrows) I = np.eye(nrows) P = np.outer(e, e) / np.inner(e, e) H = I - P return ndot(H, np.linalg.inv(np.dot(M, H) + P))
def get_response_content(fs): # define some dimensions nleading = 5 ntrailing = 3 nullity = 2 n = nleading + ntrailing # get a random matrix assumed to be nonsingular M_nonsingular = sample_asymmetric_matrix(n) # get a random nullspace and its associated projections N = 10.0 * np.random.rand(n, nullity) - 5.0 P_to_N = ndot(N, np.linalg.inv(ndot(N.T, N)), N.T) P_to_N_complement = np.eye(n) - P_to_N # get the truncated nullspace and its associated projections T = N[:nleading] P_to_T = ndot(T, np.linalg.inv(ndot(T.T, T)), T.T) P_to_T_complement = np.eye(nleading) - P_to_T # get the singular M with assumed nonsingular principal submatrix blocks M_singular = ndot(P_to_N_complement, M_nonsingular, P_to_N_complement) # get the schur complement in M and its eigendecomposition S = schur(M_singular, nleading) S_w, S_vt = np.linalg.eigh(S) # Get the double sided projection of the schur complement # onto the orthogonal complement of the truncated nullspace. Sp = ndot(P_to_T_complement, S, P_to_T_complement) Sp_w, Sp_vt = np.linalg.eigh(Sp) # Make a thing that is supposed to be the same as the schur complement. M_singular_pinv = np.linalg.pinv(M_singular) mystery_pinv = ndot( P_to_T_complement, M_singular_pinv[:nleading, :nleading], P_to_T_complement) mystery = np.linalg.pinv(mystery_pinv) # begin the output np.set_printoptions(linewidth=200) out = StringIO() print >> out, 'null space (N):' print >> out, N print >> out, 'schur complement (S):' print >> out, S print >> out, 'eigenvalues of S:' print >> out, S_w print >> out, 'eigenvectors of S:' print >> out, S_vt print >> out, 'double sided projection of the schur complement' print >> out, 'onto the complement of the truncated nullspace of M (Sp)' print >> out, Sp print >> out, 'eigenvalues of Sp:' print >> out, Sp_w print >> out, 'eigenvectors of Sp:' print >> out, Sp_vt print >> out, 'this thing that is supposed to be the schur complement:' print >> out, mystery print >> out, 'difference from the schur complement:' print >> out, mystery - S return out.getvalue()
def pinvproj(M): """ This could be made more efficient using double centering. """ nrows, ncols = M.shape if nrows != ncols: raise ValueError('expected a square matrix') e = np.ones(nrows) I = np.eye(nrows) P = np.outer(e, e) / np.inner(e, e) H = I - P HMH = ndot(H, M, H) return np.linalg.inv(HMH + P) - P
def get_response_content(fs): n = 5 e = np.ones(n) J = np.outer(e, e) M = sample_asymmetric_matrix(n) M_sum = np.sum(M) MJM = ndot(M, J, M) R = mean_removed(M) RJR = ndot(R, J, R) # begin the output np.set_printoptions(linewidth=200) out = StringIO() print >> out, 'original matrix (M):' print >> out, M print >> out, 'MJM:' print >> out, MJM print >> out, 'mean-removed matrix (R):' print >> out, R print >> out, 'grand mean of R:' print >> out, np.mean(R) print >> out, 'RJR:' print >> out, RJR return out.getvalue()
def get_barbell_rate_matrix(p_mid): # define a hollow exchangeability-like matrix nstates = 3 Z = np.array([[0.0, 1.0, 0.0], [1.0, 0.0, 1.0], [0.0, 1.0, 0.0]]) # define the stationary distribution p = np.array([(1 - p_mid) / 2, p_mid, (1 - p_mid) / 2]) # define the mutation matrix D = np.diag(p) D_inv = np.diag(np.reciprocal(p)) Q_unnormal = ndot(D_inv**0.5, Z, D**0.5) Q = np.copy(Q_unnormal) for i in range(nstates): Q[i, i] = -np.sum(Q[i]) return Q, p
def get_internal_vertex_to_leaf_distn_cov(T, B): """ This is a possibly equivalent formualtion. It is based on Schur complementation in the unrooted covariance matrix. Return a map from an internal vertex to a leaf distribution. @return: a dictionary that maps an internal vertex to a leaf distribution """ leaves = Ftree.T_to_leaves(T) internal = Ftree.T_to_internal_vertices(T) vertices = leaves + internal # Get the full tree Laplacian matrix. L = Ftree.TB_to_L_principal(T, B, vertices) # Get the unrooted covariance matrix. HSH = np.linalg.pinv(L) # Use the multivariate normal distribution wikipedia page # for conditional distributions. nleaves = len(leaves) ninternal = len(internal) # # This interpolator works. #Lbb = L[nleaves:, nleaves:] #Lba = L[nleaves:, :nleaves] #interpolator = -ndot(np.linalg.pinv(Lbb), Lba) # # This interpolator seems like it should work but it does not. Saa = HSH[:nleaves, :nleaves] Sba = HSH[nleaves:, :nleaves] #print 'det(Saa)' #print np.linalg.det(Saa) interpolator = ndot(Sba, np.linalg.pinv(Saa)) # # Try a hack. #eps = 1e-12 #nvertices = len(vertices) #J = np.ones((nvertices, nvertices)) #Saa = (HSH + J)[:nleaves, :nleaves] #Sba = (HSH + J)[nleaves:, :nleaves] #interpolator = ndot(Sba, np.linalg.pinv(Saa)) # #print 'cov interpolator:' #print interpolator.shape #print interpolator d = {} for i, v in enumerate(internal): distn = {} for j, leaf in enumerate(leaves): distn[leaf] = interpolator[i, j] d[v] = distn return d
def check_generic_cut(valuator, extendor, A): """ The input matrix is expected to have a certain block structure. In particular, the leaf vertices are expected to precede the points of articulation. Because the tree is expected to be an unrooted binary tree, the relative number of leaves and points of articulation is determined by the size of the adjacency matrix. @param A: adjacency matrix of an unrooted edge-weighted binary tree @return: True if a counterexample is found """ MatrixUtil.assert_symmetric(A) MatrixUtil.assert_nonnegative(A) MatrixUtil.assert_hollow(A) nverts = A.shape[0] if nverts < 4: raise Exception('expected at least four vertices') if nverts % 2 != 0: raise Exception('expected an even number of vertices') ntips = nverts / 2 + 1 narts = nverts / 2 - 1 # get the schur complement laplacian and its associated adjacency matrix L = np.diag(np.sum(A, axis=1)) - A L_tips = L[:ntips, :ntips] - ndot( L[:ntips, -narts:], scipy.linalg.inv(L[-narts:, -narts:]), L[-narts:, :ntips], ) A_tips = np.diag(np.diag(L_tips)) - L_tips tip_valuations = valuator(A_tips) #tip_valuations -= np.mean(tip_valuations) #tip_valuations /= np.linalg.norm(tip_valuations) art_valuations = extendor(A, tip_valuations) #ntip_pos = sum(1 for v in tip_valuations if v > 0) #ntip_neg = sum(1 for v in tip_valuations if v < 0) #nart_pos = sum(1 for v in art_valuations if v > 0) #nart_neg = sum(1 for v in art_valuations if v < 0) #print ((ntip_pos, ntip_neg), (nart_pos, nart_neg)) valuations = np.concatenate((tip_valuations, art_valuations)) ncrossings = 0 for i in range(nverts): for j in range(i+1, nverts): if valuations[i] * valuations[j] * A[i, j] < 0: ncrossings += 1 if ncrossings != 1: # found a counterexample! print ncrossings print A return True
def check_generic_cut(valuator, extendor, A): """ The input matrix is expected to have a certain block structure. In particular, the leaf vertices are expected to precede the points of articulation. Because the tree is expected to be an unrooted binary tree, the relative number of leaves and points of articulation is determined by the size of the adjacency matrix. @param A: adjacency matrix of an unrooted edge-weighted binary tree @return: True if a counterexample is found """ MatrixUtil.assert_symmetric(A) MatrixUtil.assert_nonnegative(A) MatrixUtil.assert_hollow(A) nverts = A.shape[0] if nverts < 4: raise Exception('expected at least four vertices') if nverts % 2 != 0: raise Exception('expected an even number of vertices') ntips = nverts / 2 + 1 narts = nverts / 2 - 1 # get the schur complement laplacian and its associated adjacency matrix L = np.diag(np.sum(A, axis=1)) - A L_tips = L[:ntips, :ntips] - ndot( L[:ntips, -narts:], scipy.linalg.inv(L[-narts:, -narts:]), L[-narts:, :ntips], ) A_tips = np.diag(np.diag(L_tips)) - L_tips tip_valuations = valuator(A_tips) #tip_valuations -= np.mean(tip_valuations) #tip_valuations /= np.linalg.norm(tip_valuations) art_valuations = extendor(A, tip_valuations) #ntip_pos = sum(1 for v in tip_valuations if v > 0) #ntip_neg = sum(1 for v in tip_valuations if v < 0) #nart_pos = sum(1 for v in art_valuations if v > 0) #nart_neg = sum(1 for v in art_valuations if v < 0) #print ((ntip_pos, ntip_neg), (nart_pos, nart_neg)) valuations = np.concatenate((tip_valuations, art_valuations)) ncrossings = 0 for i in range(nverts): for j in range(i + 1, nverts): if valuations[i] * valuations[j] * A[i, j] < 0: ncrossings += 1 if ncrossings != 1: # found a counterexample! print ncrossings print A return True
def get_pi_mi_t2_diag_approx(Q, v, t): """ Second order taylor expansion for only some contributions. Contributions of off-diagonal entries are computed exactly. @param Q: parent independent rate matrix @param v: stationary distribution @param t: amount of time """ n = len(v) # get the randomization rate a = -np.trace(Q) / (n-1) x = math.exp(-a*t) h = ndot(v, 1-v) adjustment = (x*(1 - 0.5*x - math.log(1-x)) + math.log(1-x))*h return get_pi_mi_t2_approx(Q, v, t) + adjustment
def get_barbell_rate_matrix(p_mid): # define a hollow exchangeability-like matrix nstates = 3 Z = np.array([ [0.0, 1.0, 0.0], [1.0, 0.0, 1.0], [0.0, 1.0, 0.0]]) # define the stationary distribution p = np.array([(1 - p_mid)/2, p_mid, (1 - p_mid)/2]) # define the mutation matrix D = np.diag(p) D_inv = np.diag(np.reciprocal(p)) Q_unnormal = ndot(D_inv**0.5, Z, D**0.5) Q = np.copy(Q_unnormal) for i in range(nstates): Q[i, i] = -np.sum(Q[i]) return Q, p
def _holmes_rubin_2002_summation(U, a, b, i, K): """ @param U: an orthonormal matrix @param a: integer initial state index @param b: integer final state index @param i: integer query state index @param K: a symmetric matrix with eigenvalue and time information """ """ total = 0 for k in range(len(U)): for l in range(len(U)): total += U[a,k]*U[i,k]*U[i,l]*U[b,l]*K[k,l] return total """ u = U[a] * U[i] v = U[b] * U[i] return ndot(u, K, v)
def pinvproj(M): """ This could be made more efficient using double centering. """ nrows, ncols = M.shape if nrows != ncols: raise ValueError('expected a square matrix') e = np.ones(nrows) I = np.eye(nrows) P = np.outer(e, e) / np.inner(e, e) H = I - P # #HMH = MatrixUtil.double_centered(M) #return np.linalg.pinv(HMH) # HMH = ndot(H, M, H) W = HMH + P d = np.linalg.det(W) if abs(d) < 1e-5: #raise ValueError('small determinant in pinvproj: %f' % d) pass return np.linalg.inv(W) - P
def get_lhs(M, nkeep, ndelete): n = nkeep + ndelete P = get_p_centering_partial_del(nkeep, ndelete) return np.linalg.pinv(ndot(P, M, P))
def get_response_content(fs): if fs.plain: Q, v_trans, v_recur = get_plain_rate_matrix() elif fs.rand: Q, v_trans, v_recur = get_random_structured_rate_matrix() else: raise Exception nstates = Q.shape[0] w, vl, vr = scipy.linalg.eig(Q, left=True, right=True) vl_inv = scipy.linalg.inv(vl) vr_inv = scipy.linalg.inv(vr) # # do weird things with the sylvester equation n = nstates / 2 A_syl = Q[:n, :n] B_syl = -Q[n:, n:] Q_syl = -Q[:n, n:] X = scipy.linalg.solve_sylvester(A_syl, B_syl, Q_syl) T = np.array(np.bmat([ [np.eye(n), X], [np.zeros((n, n)), np.eye(n)], ])) T_inv = scipy.linalg.inv(T) # # do stuff with the stationary distributions of the separate processes v_trans_recur = np.hstack((v_trans, v_recur)) D_sqrt = np.diag(np.sqrt(v_trans_recur)) D_sqrt_recip = np.diag(np.reciprocal(np.sqrt(v_trans_recur))) block_diag = ndot(D_sqrt, T_inv, Q, T, D_sqrt_recip) w, U = scipy.linalg.eigh(block_diag) w_full = np.diag(ndot(U.T, D_sqrt, T_inv, Q, T, D_sqrt_recip, U)) # np.set_printoptions( linewidth=1000000, threshold=1000000, ) out = StringIO() print >> out, 'Q:' print >> out, Q print >> out print >> out, 'w:' print >> out, w print >> out print >> out, 'vl:' print >> out, vl print >> out print >> out, 'vl.T:' print >> out, vl.T print >> out print >> out, 'inv(vl):' print >> out, vl_inv print >> out print >> out, 'vr:' print >> out, vr print >> out print >> out, 'vr.T:' print >> out, vr.T print >> out print >> out, 'inv(vr):' print >> out, vr_inv print >> out print >> out, 'inv(vl).T w vl.T:' print >> out, np.dot(vl_inv.T, np.dot(np.diag(w), vl.T)) print >> out print >> out, 'vr w inv(vr):' print >> out, np.dot(vr, np.dot(np.diag(w), vr_inv)) print >> out print >> out print >> out, 'sylvester equation stuff...' print >> out print >> out, 'X:' print >> out, X print >> out print >> out, 'T:' print >> out, T print >> out print >> out, 'inv(T):' print >> out, T_inv print >> out print >> out, 'inv(T) Q T:' print >> out, np.dot(T_inv, np.dot(Q, T)) print >> out print >> out, 'U.T D^(1/2) inv(T) Q T D^(-1/2) U:' print >> out, ndot(U.T, D_sqrt, T_inv, Q, T, D_sqrt_recip, U) print >> out print >> out, 'expm(Q):' print >> out, scipy.linalg.expm(Q) print >> out print >> out, 'T D^-1/2 U exp(w) U.T D^1/2 T^-1' print >> out, ndot(T, D_sqrt_recip, U, np.diag(np.exp(w_full)), U.T, D_sqrt, T_inv) print >> out return out.getvalue()
def process(fs): n = fs.nstates np.set_printoptions(linewidth=200) out = StringIO() # Sample a symmetric rate matrix and a stationary distribution, # then construct the rate matrix R. S = MatrixUtil.sample_pos_sym_matrix(n) v = mrate.sample_distn(n) psi = np.sqrt(v) R = (S.T / psi).T * psi R -= np.diag(np.sum(R, axis=1)) R_W, R_V = scipy.linalg.eig(R) # construct the symmetric matrix that is similar to R R_sim = (R.T * psi).T / psi if not np.allclose(R_sim, R_sim.T): raise ValueError('the similar symmetric matrix is not symmetric...') R_sim_W, R_sim_V = scipy.linalg.eigh(R_sim) R_gap = -R_sim_W[-2] v2 = R_sim_V.T[-2]**2 # reconstruct the eigenvectors of R R_V_rebuilt = (R_sim_V.T / psi).T # Try to make the commute time matrix. # R_sim is a lot like a Laplacian matrix, so lets pseudoinvert it. R_sim_pinv = scipy.linalg.pinv(R_sim) myouter = np.outer(np.ones(n), np.diag(R_sim_pinv)) D = -(myouter + myouter.T - 2 * R_sim_pinv) D_commute = mrate.get_commute_distance_matrix(R, v) if not np.allclose(D, D_commute): raise ValueError('error computing commute distances') HDH = MatrixUtil.double_centered(D) HDH_W, HDH_V = scipy.linalg.eigh(HDH) # compute squared pairwise distances brutely X = R_sim_V.T[:-1].T / np.sqrt(-R_sim_W[:-1]) D_brute = np.array([[np.dot(b - a, b - a) for a in X] for b in X]) print >> out, 'reconstructed EDM:' print >> out, D print >> out D = (D.T / psi).T / psi print >> out, 'divide by square roots of stationary probabilities:' print >> out, D print >> out print >> out, 'eigh of centered EDM:' print >> out, 'eigenvalues:' print >> out, HDH_W print >> out, 'reciprocal nonzero eigenvalues:' print >> out, 1 / HDH_W print >> out, 'eigenvectors:' print >> out, HDH_V print >> out print >> out, 'squared distances computed brutely:' print >> out, D_brute print >> out print >> out, '1 / (h * max(D)):', 1 / (np.dot(v, 1 - v) * np.max(D)) print >> out, '1 / max(D):', 1 / np.max(D) print >> out # report some more standard stuff print >> out, 'sampled rate matrix R:' print >> out, R print >> out, 'stationary distn:', v print >> out, '1/R01 + 1/R10:', 1 / R[0, 1] + 1 / R[1, 0] print >> out print >> out, 'scipy.linagl.eig(R):' print >> out, R_W print >> out, R_V print >> out print >> out, 'symmetric matrix similar to R:' print >> out, R_sim print >> out print >> out, 'eigh of the symmetric similar matrix to R:' print >> out, R_sim_W print >> out, R_sim_V print >> out, 'spectral gap:', R_gap print >> out, 'entrywise squares of eigenvectors:' print >> out, R_sim_V**2 print >> out, 'a bilinear form involving a fiedler-like eigenvector:' print >> out, ndot(R_sim_V.T[-2], R_sim, R_sim_V.T[-2]) print >> out, 'expected rate:', -np.dot(v, np.diag(R)) print >> out, 'second order expected rate:', -np.dot(v2, np.diag(R)) print >> out print >> out, 'eigenvectors of R from eigenvectors of the similar matrix:' print >> out, R_sim_W print >> out, R_V_rebuilt print >> out return out.getvalue().rstrip()
def schur(M, nsmall): B = M[:nsmall, nsmall:] C = np.linalg.inv(M[nsmall:, nsmall:]) return M[:nsmall, :nsmall] - ndot(B, C, B.T)
def get_response_content(fs): if fs.plain: Q, v_trans, v_recur = get_plain_rate_matrix() elif fs.rand: Q, v_trans, v_recur = get_random_structured_rate_matrix() else: raise Exception nstates = Q.shape[0] w, vl, vr = scipy.linalg.eig(Q, left=True, right=True) vl_inv = scipy.linalg.inv(vl) vr_inv = scipy.linalg.inv(vr) # # do weird things with the sylvester equation n = nstates / 2 A_syl = Q[:n, :n] B_syl = -Q[n:, n:] Q_syl = -Q[:n, n:] X = scipy.linalg.solve_sylvester(A_syl, B_syl, Q_syl) T = np.array(np.bmat([[np.eye(n), X], [np.zeros((n, n)), np.eye(n)]])) T_inv = scipy.linalg.inv(T) # # do stuff with the stationary distributions of the separate processes v_trans_recur = np.hstack((v_trans, v_recur)) D_sqrt = np.diag(np.sqrt(v_trans_recur)) D_sqrt_recip = np.diag(np.reciprocal(np.sqrt(v_trans_recur))) block_diag = ndot(D_sqrt, T_inv, Q, T, D_sqrt_recip) w, U = scipy.linalg.eigh(block_diag) w_full = np.diag(ndot(U.T, D_sqrt, T_inv, Q, T, D_sqrt_recip, U)) # np.set_printoptions(linewidth=1000000, threshold=1000000) out = StringIO() print >> out, "Q:" print >> out, Q print >> out print >> out, "w:" print >> out, w print >> out print >> out, "vl:" print >> out, vl print >> out print >> out, "vl.T:" print >> out, vl.T print >> out print >> out, "inv(vl):" print >> out, vl_inv print >> out print >> out, "vr:" print >> out, vr print >> out print >> out, "vr.T:" print >> out, vr.T print >> out print >> out, "inv(vr):" print >> out, vr_inv print >> out print >> out, "inv(vl).T w vl.T:" print >> out, np.dot(vl_inv.T, np.dot(np.diag(w), vl.T)) print >> out print >> out, "vr w inv(vr):" print >> out, np.dot(vr, np.dot(np.diag(w), vr_inv)) print >> out print >> out print >> out, "sylvester equation stuff..." print >> out print >> out, "X:" print >> out, X print >> out print >> out, "T:" print >> out, T print >> out print >> out, "inv(T):" print >> out, T_inv print >> out print >> out, "inv(T) Q T:" print >> out, np.dot(T_inv, np.dot(Q, T)) print >> out print >> out, "U.T D^(1/2) inv(T) Q T D^(-1/2) U:" print >> out, ndot(U.T, D_sqrt, T_inv, Q, T, D_sqrt_recip, U) print >> out print >> out, "expm(Q):" print >> out, scipy.linalg.expm(Q) print >> out print >> out, "T D^-1/2 U exp(w) U.T D^1/2 T^-1" print >> out, ndot(T, D_sqrt_recip, U, np.diag(np.exp(w_full)), U.T, D_sqrt, T_inv) print >> out return out.getvalue()
def get_response_content(fs): np.set_printoptions(linewidth=200) out = StringIO() # do the analysis n = fs.nstates pi_m = sample_distribution(n) pi_q = sample_distribution(n) v = np.log(np.sqrt(pi_m / pi_q)) K = np.zeros((n, n)) for i in range(n): for j in range(n): x = v[j] - v[i] if x: K[i, j] = x / math.sinh(x) else: K[i, j] = 1.0 W, V = scipy.linalg.eigh(K) # make a gtr mutation matrix S_precursor = sample_symmetric_rate_matrix(n) M = to_gtr_c(S_precursor, pi_m) M_distn = R_to_distn(M) if not np.allclose(M_distn, pi_m): raise ValueError('stationary distribution error') # resymmetrize lam = np.diag(np.sqrt(pi_m)) rlam = np.diag(np.reciprocal(np.sqrt(pi_m))) S = ndot(lam, M, rlam) R = S * K lam = np.diag(np.sqrt(pi_q)) rlam = np.diag(np.reciprocal(np.sqrt(pi_q))) Q_from_R = ndot(rlam, R, lam) Q_from_R -= np.diag(np.sum(Q_from_R, axis=1)) Q_from_S = ndot(rlam, S, lam) Q_from_S -= np.diag(np.sum(Q_from_S, axis=1)) Q_from_precursor = to_gtr_c(S_precursor, pi_q) # write the report print >> out, 'mutation process stationary distribution:' print >> out, pi_m print >> out print >> out, 'selection process stationary distribution:' print >> out, pi_q print >> out print >> out, 'vector to which the kernel function is applied:' print >> out, v print >> out print >> out, 'kernel matrix K:' print >> out, K print >> out print >> out, 'eigenvalues of K:' print >> out, W print >> out print >> out, 'eigenvectors of K:' print >> out, V print >> out print >> out, 'symmetric precursor matrix:' print >> out, S_precursor print >> out print >> out, 'rate matrix M:' print >> out, M print >> out print >> out, 'symmetrization S of rate matrix M:' print >> out, S print >> out print >> out print >> out, 'symmetrization R = S o K' print >> out, R print >> out print >> out, 'de-symmetrized rate matrix derived from R:' print >> out, Q_from_R print >> out print >> out print >> out, 'de-symmetrized rate matrix derived from S:' print >> out, Q_from_S print >> out print >> out print >> out, 'rate matrix derived from precursor rate matrix:' print >> out, Q_from_precursor print >> out return out.getvalue()
def get_response_content(fs): out = StringIO() np.set_printoptions(linewidth=200) # get the user defined variables n = fs.nstates # sample a random rate matrix v = divtime.sample_distribution(n) S = divtime.sample_symmetric_rate_matrix(n) R = mrate.to_gtr_halpern_bruno(S, v) # get some properties of the rate matrix and its re-symmetrization S = mrate.symmetrized(R) distn = mrate.R_to_distn(R) w, U = np.linalg.eigh(S) D = np.diag(U.T[-1])**2 D_inv = np.diag(np.reciprocal(U.T[-1]))**2 for t in (1.0, 2.0): P = scipy.linalg.expm(R * t) M = ndot(D**.5, scipy.linalg.expm(S * t), D**.5) M_star = ndot(D_inv**.5, scipy.linalg.expm(S * t), D_inv**.5) M_star_log = np.log(M_star) M_star_log_w, M_star_log_U = np.linalg.eigh(M_star_log) E = M * np.log(M_star) E_w, E_U = np.linalg.eigh(E) print >> out, 't:' print >> out, t print >> out print >> out, 'randomly sampled rate matrix R' print >> out, R print >> out print >> out, 'symmetrized matrix S' print >> out, S print >> out print >> out, 'stationary distribution diagonal D' print >> out, D print >> out print >> out, 'R = D^-1/2 S D^1/2' print >> out, ndot(D_inv**.5, S, D**.5) print >> out print >> out, 'probability matrix e^(R*t) = P' print >> out, P print >> out print >> out, 'P = D^-1/2 e^(S*t) D^1/2' print >> out, ndot(D_inv**.5, scipy.linalg.expm(S * t), D**.5) print >> out print >> out, 'pairwise distribution matrix M' print >> out, 'M = D^1/2 e^(S*t) D^1/2' print >> out, M print >> out print >> out, 'sum of entries of M' print >> out, np.sum(M) print >> out print >> out, 'M_star = D^-1/2 e^(S*t) D^-1/2' print >> out, M_star print >> out print >> out, 'entrywise logarithm logij(M_star)' print >> out, np.log(M_star) print >> out print >> out, 'Hadamard product M o logij(M_star) = E' print >> out, E print >> out print >> out, 'spectrum of M:' print >> out, np.linalg.eigvalsh(M) print >> out print >> out, 'spectrum of logij(M_star):' print >> out, M_star_log_w print >> out print >> out, 'corresponding eigenvectors of logij(M_star) as columns:' print >> out, M_star_log_U print >> out print >> out, 'spectrum of E:' print >> out, E_w print >> out print >> out, 'corresponding eigenvectors of E as columns:' print >> out, E_U print >> out print >> out, 'entrywise square roots of stationary distribution:' print >> out, np.sqrt(v) print >> out print >> out, 'sum of entries of E:' print >> out, np.sum(E) print >> out print >> out, 'mutual information:' print >> out, ctmcmi.get_mutual_information(R, t) print >> out print >> out return out.getvalue()
def process(fs): n = fs.nstates np.set_printoptions(linewidth=200) out = StringIO() # Sample a symmetric rate matrix and a stationary distribution, # then construct the rate matrix R. S = MatrixUtil.sample_pos_sym_matrix(n) v = mrate.sample_distn(n) psi = np.sqrt(v) R = (S.T / psi).T * psi R -= np.diag(np.sum(R, axis=1)) R_W, R_V = scipy.linalg.eig(R) # construct the symmetric matrix that is similar to R R_sim = (R.T * psi).T / psi if not np.allclose(R_sim, R_sim.T): raise ValueError('the similar symmetric matrix is not symmetric...') R_sim_W, R_sim_V = scipy.linalg.eigh(R_sim) R_gap = -R_sim_W[-2] v2 = R_sim_V.T[-2]**2 # reconstruct the eigenvectors of R R_V_rebuilt = (R_sim_V.T / psi).T # Try to make the commute time matrix. # R_sim is a lot like a Laplacian matrix, so lets pseudoinvert it. R_sim_pinv = scipy.linalg.pinv(R_sim) myouter = np.outer(np.ones(n), np.diag(R_sim_pinv)) D = -(myouter + myouter.T - 2*R_sim_pinv) D_commute = mrate.get_commute_distance_matrix(R, v) if not np.allclose(D, D_commute): raise ValueError('error computing commute distances') HDH = MatrixUtil.double_centered(D) HDH_W, HDH_V = scipy.linalg.eigh(HDH) # compute squared pairwise distances brutely X = R_sim_V.T[:-1].T / np.sqrt(-R_sim_W[:-1]) D_brute = np.array([[np.dot(b - a, b - a) for a in X] for b in X]) print >> out, 'reconstructed EDM:' print >> out, D print >> out D = (D.T / psi).T / psi print >> out, 'divide by square roots of stationary probabilities:' print >> out, D print >> out print >> out, 'eigh of centered EDM:' print >> out, 'eigenvalues:' print >> out, HDH_W print >> out, 'reciprocal nonzero eigenvalues:' print >> out, 1 / HDH_W print >> out, 'eigenvectors:' print >> out, HDH_V print >> out print >> out, 'squared distances computed brutely:' print >> out, D_brute print >> out print >> out, '1 / (h * max(D)):', 1 / (np.dot(v, 1-v) * np.max(D)) print >> out, '1 / max(D):', 1 / np.max(D) print >> out # report some more standard stuff print >> out, 'sampled rate matrix R:' print >> out, R print >> out, 'stationary distn:', v print >> out, '1/R01 + 1/R10:', 1/R[0,1] + 1/R[1,0] print >> out print >> out, 'scipy.linagl.eig(R):' print >> out, R_W print >> out, R_V print >> out print >> out, 'symmetric matrix similar to R:' print >> out, R_sim print >> out print >> out, 'eigh of the symmetric similar matrix to R:' print >> out, R_sim_W print >> out, R_sim_V print >> out, 'spectral gap:', R_gap print >> out, 'entrywise squares of eigenvectors:' print >> out, R_sim_V ** 2 print >> out, 'a bilinear form involving a fiedler-like eigenvector:' print >> out, ndot(R_sim_V.T[-2], R_sim, R_sim_V.T[-2]) print >> out, 'expected rate:', -np.dot(v, np.diag(R)) print >> out, 'second order expected rate:', -np.dot(v2, np.diag(R)) print >> out print >> out, 'eigenvectors of R from eigenvectors of the similar matrix:' print >> out, R_sim_W print >> out, R_V_rebuilt print >> out return out.getvalue().rstrip()
def process(fs): nstates = fs.nstates np.set_printoptions(linewidth=200) t = fs.t ### sample a random time ##time_mu = 0.01 ##t = random.expovariate(1 / time_mu) # Sample a rate matrix. # Use a trick by Robert Kern to left and right multiply by diagonals. # http://mail.scipy.org/pipermail/numpy-discussion/2007-March/ # 026809.html S = MatrixUtil.sample_pos_sym_matrix(nstates) v = mrate.sample_distn(nstates) R = (v**-0.5)[:,np.newaxis] * S * (v**0.5) R -= np.diag(np.sum(R, axis=1)) # Construct a parent-independent process # with the same max rate and stationary distribution # as the sampled process. if fs.parent_indep: Q = np.outer(np.ones(nstates), v) Q -= np.diag(np.sum(Q, axis=1)) pi_rescaling_factor = max(np.diag(R) / np.diag(Q)) Q *= pi_rescaling_factor Z = msimpl.get_fast_meta_f81_autobarrier(Q) # Construct a child-independent process # with the same expected rate # as the sampled process if fs.child_indep: C = np.outer(1/v, np.ones(nstates)) C -= np.diag(np.sum(C, axis=1)) ci_rescaling_factor = np.max(R / C) #expected_rate = -ndot(np.diag(R), v) #ci_rescaling_factor = expected_rate / (nstates*(nstates-1)) #ci_rescaling_factor = expected_rate / (nstates*nstates) C *= ci_rescaling_factor Q = C if fs.bipartitioned: Q = msimpl.get_fast_meta_f81_autobarrier(R) # Check that the mutual information of the # parent independent process is smaller. out = StringIO() print >> out, 'sampled symmetric part of the rate matrix S:' print >> out, S print >> out print >> out, 'sampled stationary distribution v:' print >> out, v print >> out print >> out, 'shannon entropy of stationary distribution v:' print >> out, -np.dot(np.log(v), v) print >> out print >> out, 'sqrt stationary distribution:' print >> out, np.sqrt(v) print >> out print >> out, 'implied rate matrix R:' print >> out, R print >> out print >> out, 'eigenvalues of R:', scipy.linalg.eigvals(R) print >> out print >> out, 'relaxation rate of R:', print >> out, sorted(np.abs(scipy.linalg.eigvals(R)))[1] print >> out print >> out, 'expected rate of R:', mrate.Q_to_expected_rate(R) print >> out print >> out, 'cheeger bounds of R:', get_cheeger_bounds(R, v) print >> out print >> out, 'randomization rate of R:', get_randomization_rate(R, v) print >> out candidates = [get_randomization_candidate(R, v, i) for i in range(nstates)] if np.allclose(get_randomization_rate(R, v), candidates): print >> out, 'all candidates are equal to this rate' else: print >> out, 'not all candidates are equal to this rate' print >> out print >> out, 'simplified rate matrix Q:' print >> out, Q print >> out qv = mrate.R_to_distn(Q) print >> out, 'stationary distribution of Q:' print >> out, qv print >> out print >> out, 'ratio qv/v:' print >> out, qv / v print >> out print >> out, 'shannon entropy of stationary distribution of Q:' print >> out, -np.dot(np.log(qv), qv) print >> out if fs.parent_indep: print >> out, 'parent independent rescaling factor:' print >> out, pi_rescaling_factor print >> out if fs.child_indep: print >> out, 'child independent rescaling factor:' print >> out, ci_rescaling_factor print >> out print >> out, 'eigenvalues of Q:', scipy.linalg.eigvals(Q) print >> out print >> out, 'relaxation rate of Q:', print >> out, sorted(np.abs(scipy.linalg.eigvals(Q)))[1] print >> out print >> out, 'expected rate of Q:', mrate.Q_to_expected_rate(Q) print >> out print >> out, 'cheeger bounds of Q:', get_cheeger_bounds(Q, v) print >> out print >> out, 'randomization rate of Q:', get_randomization_rate(Q, v) print >> out candidates = [get_randomization_candidate(Q, v, i) for i in range(nstates)] if np.allclose(get_randomization_rate(Q, v), candidates): print >> out, 'all candidates are equal to this rate' else: print >> out, 'warning: not all candidates are equal to this rate' print >> out print >> out, 'E(rate) of Q divided by logical entropy:', print >> out, mrate.Q_to_expected_rate(Q) / ndot(v, 1-v) print >> out print >> out, 'symmetric matrix similar to Q:' S = ndot(np.diag(np.sqrt(v)), Q, np.diag(1/np.sqrt(v))) print >> out, S print >> out print >> out, 'eigendecomposition of the similar matrix:' W, V = scipy.linalg.eigh(S) print >> out, V print >> out, np.diag(W) print >> out, V.T print >> out # print >> out, 'time:', t print >> out print >> out, 'stationary distn logical entropy:', ndot(v, 1-v) print >> out # P_by_hand = get_pi_transition_matrix(Q, v, t) print >> out, 'simplified-process transition matrix computed by hand:' print >> out, P_by_hand print >> out print >> out, 'simplified-process transition matrix computed by expm:' print >> out, scipy.linalg.expm(Q*t) print >> out # print >> out, 'simplified-process m.i. by hand:' print >> out, get_pi_mi(Q, v, t) print >> out print >> out, 'simplified-process m.i. by expm:' print >> out, ctmcmi.get_expected_ll_ratio(Q, t) print >> out # print >> out, 'original process m.i. by expm:' print >> out, ctmcmi.get_expected_ll_ratio(R, t) print >> out # print >> out, 'stationary distn Shannon entropy:' print >> out, -ndot(v, np.log(v)) print >> out # if fs.parent_indep: print >> out, 'approximate simplified process m.i. 2nd order approx:' print >> out, get_pi_mi_t2_approx(Q, v, t) print >> out print >> out, 'approximate simplified process m.i. "better" approx:' print >> out, get_pi_mi_t2_diag_approx(Q, v, t) print >> out print >> out, '"f81-ization plus barrier" of pure f81-ization:' print >> out, Z print >> out # return out.getvalue().rstrip()
def get_response_content(fs): # read the energies from the form data energies = [] for line in iterutils.stripped_lines(fs.energies.splitlines()): try: energy = float(line) except ValueError as e: raise ValueError('invalid energy: %s' % line) energies.append(energy) n = len(energies) if n > 100: raise ValueError('too many energies') # compute the rate matrix R = np.zeros((n, n)) for row in range(n): for col in range(n): rate = math.exp(-(energies[col] - energies[row])) R[row, col] = rate for i, r in enumerate(R): R[i, i] = -np.sum(r) + 1 # get the transition matrix at large finite time large_t = 1000.0 T = scipy.linalg.expm(R * large_t) # eigendecompose Wr, Vr = scipy.linalg.eig(R, left=False, right=True) Wl, Vl = scipy.linalg.eig(R, left=True, right=False) # get left eigenvector associated with stationary distribution val_vec_pairs = [(abs(Wl[i]), Vl[:, i]) for i in range(n)] dummy, pi_eigenvector = min(val_vec_pairs) # get the stationary distribution itself total = np.sum(pi_eigenvector) pi_arr = np.array([v / total for v in pi_eigenvector]) # get the square root stationary vector and diagonal matrix sqrt_pi_arr = np.sqrt(pi_arr) lam = np.diag(sqrt_pi_arr) # get reciprocal arrays recip_sqrt_pi_arr = np.reciprocal(sqrt_pi_arr) recip_lam = np.reciprocal(lam) # print things np.set_printoptions(linewidth=300) out = StringIO() print >> out, 'rate matrix:' print >> out, R print >> out print >> out, 'rate matrix row sums:' print >> out, np.sum(R, axis=1) print >> out print >> out, 'eigenvalues:' print >> out, Wr print >> out print >> out, 'corresponding orthonormal right eigenvectors (columns):' print >> out, Vr print >> out print >> out, 'eigenvalues:' print >> out, Wl print >> out print >> out, 'corresponding orthonormal left eigenvectors (columns):' print >> out, Vl print >> out print >> out, 'L2 normalized eigenvector associated with stationary distn:' print >> out, pi_eigenvector print >> out print >> out, 'L1 renormalized vector (the stationary distribution):' print >> out, pi_arr print >> out print >> out # eigendecompose the transition matrix Wr, Vr = scipy.linalg.eig(T, left=False, right=True) Wl, Vl = scipy.linalg.eig(T, left=True, right=False) print >> out, 'transition matrix for t=%f:' % large_t print >> out, T print >> out print >> out, 'transition matrix row sums:' print >> out, np.sum(T, axis=1) print >> out print >> out, 'eigenvalues:' print >> out, Wr print >> out print >> out, 'corresponding orthonormal right eigenvectors (columns):' print >> out, Vr print >> out print >> out, 'eigenvalues:' print >> out, Wl print >> out print >> out, 'corresponding orthonormal left eigenvectors (columns):' print >> out, Vl print >> out print >> out, 'incorrect reconstitution of the transition matrix:' print >> out, ndot(Vr, np.diag(Wr), Vl.T) print >> out print >> out # Use the known properties of reversibility to symmetrize the matrix. t = 3 coeffs, rates, c = get_identicality_params(R) print >> out, 'brute identicality computation for t=%f:' % t print >> out, get_numerical_identicality(R, t) print >> out print >> out, 'sophisticated identicality computation for t=%f:' % t print >> out, get_symbolic_identicality(coeffs, rates, c, t) print >> out print >> out # Try another couple rate matrices. e2 = math.exp(2) en2 = math.exp(-2) rate_matrices = [ np.array([[-2.0, 2.0], [2.0, -2.0]]), np.array([[-1.0, 1.0], [3.0, -3.0]]), np.array([[-1, 1, 0], [1, -2, 1], [0, 1, -1]]), #np.array([[-4.0, 4.0, 0], [1, -2, 1], [0, 4, -4]])] #np.array([[-1, 1, 0], [7, -14, 7], [0, 1, -1]])] np.array([[-en2, en2, 0], [e2, -2 * e2, e2], [0, en2, -en2]]) ] t = 3.0 for R in rate_matrices: coeffs, rates, c = get_identicality_params(R) print >> out, 'test rate matrix:' print >> out, R print >> out print >> out, 'eigenvalues:' print >> out, scipy.linalg.eigvals(R) print >> out print >> out, 'stationary distribution:' print >> out, R_to_distn(R) print >> out print >> out, 'brute identicality computation for t=%f:' % t print >> out, get_numerical_identicality(R, t) print >> out print >> out, 'sophisticated identicality computation for t=%f:' % t print >> out, get_symbolic_identicality(coeffs, rates, c, t) print >> out print >> out, 'identicality derivative for t=%f:' % t print >> out, get_identicality_derivative(coeffs, rates, t) print >> out print >> out # return the message return out.getvalue().rstrip()
def get_response_content(fs): out = StringIO() np.set_printoptions(linewidth=200) # get the user defined variables n = fs.nstates # sample a random rate matrix v = divtime.sample_distribution(n) S = divtime.sample_symmetric_rate_matrix(n) R = mrate.to_gtr_halpern_bruno(S, v) # get some properties of the rate matrix and its re-symmetrization S = mrate.symmetrized(R) distn = mrate.R_to_distn(R) w, U = np.linalg.eigh(S) D = np.diag(U.T[-1])**2 D_inv = np.diag(np.reciprocal(U.T[-1]))**2 for t in (1.0, 2.0): P = scipy.linalg.expm(R*t) M = ndot(D**.5, scipy.linalg.expm(S*t), D**.5) M_star = ndot(D_inv**.5, scipy.linalg.expm(S*t), D_inv**.5) M_star_log = np.log(M_star) M_star_log_w, M_star_log_U = np.linalg.eigh(M_star_log) E = M * np.log(M_star) E_w, E_U = np.linalg.eigh(E) print >> out, 't:' print >> out, t print >> out print >> out, 'randomly sampled rate matrix R' print >> out, R print >> out print >> out, 'symmetrized matrix S' print >> out, S print >> out print >> out, 'stationary distribution diagonal D' print >> out, D print >> out print >> out, 'R = D^-1/2 S D^1/2' print >> out, ndot(D_inv**.5, S, D**.5) print >> out print >> out, 'probability matrix e^(R*t) = P' print >> out, P print >> out print >> out, 'P = D^-1/2 e^(S*t) D^1/2' print >> out, ndot(D_inv**.5, scipy.linalg.expm(S*t), D**.5) print >> out print >> out, 'pairwise distribution matrix M' print >> out, 'M = D^1/2 e^(S*t) D^1/2' print >> out, M print >> out print >> out, 'sum of entries of M' print >> out, np.sum(M) print >> out print >> out, 'M_star = D^-1/2 e^(S*t) D^-1/2' print >> out, M_star print >> out print >> out, 'entrywise logarithm logij(M_star)' print >> out, np.log(M_star) print >> out print >> out, 'Hadamard product M o logij(M_star) = E' print >> out, E print >> out print >> out, 'spectrum of M:' print >> out, np.linalg.eigvalsh(M) print >> out print >> out, 'spectrum of logij(M_star):' print >> out, M_star_log_w print >> out print >> out, 'corresponding eigenvectors of logij(M_star) as columns:' print >> out, M_star_log_U print >> out print >> out, 'spectrum of E:' print >> out, E_w print >> out print >> out, 'corresponding eigenvectors of E as columns:' print >> out, E_U print >> out print >> out, 'entrywise square roots of stationary distribution:' print >> out, np.sqrt(v) print >> out print >> out, 'sum of entries of E:' print >> out, np.sum(E) print >> out print >> out, 'mutual information:' print >> out, ctmcmi.get_mutual_information(R, t) print >> out print >> out return out.getvalue()
def process(fs): n = fs.nstates np.set_printoptions(linewidth=200) out = StringIO() # Sample a symmetric rate matrix and a stationary distribution, # then construct the rate matrix R. S = MatrixUtil.sample_pos_sym_matrix(n) v = mrate.sample_distn(n) psi = np.sqrt(v) R = (S.T / psi).T * psi R -= np.diag(np.sum(R, axis=1)) R_W, R_V = scipy.linalg.eig(R) # construct the symmetric matrix that is similar to R R_sim = (R.T * psi).T / psi if not np.allclose(R_sim, R_sim.T): raise ValueError('the similar symmetric matrix is not symmetric...') R_sim_W, R_sim_V = scipy.linalg.eigh(R_sim) R_gap = -R_sim_W[-2] v2 = R_sim_V.T[-2]**2 # reconstruct the eigenvectors of R R_V_rebuilt = (R_sim_V.T / psi).T # Sample some numbers then subtract mean then normalize. dv = np.random.exponential(1, n) dv -= np.mean(dv) dv *= fs.eps / np.dot(dv, dv) qv = v + dv if any(qv < 0) or any(1 < qv): raise ValueError( 'the stationary distribution change was too large ' 'for the randomly sampled process') qpsi = np.sqrt(qv) # define the rate matrix if fs.knudsen: Q = (S.T / qpsi).T * qpsi elif fs.sella: Q = R.copy() for a in range(n): for b in range(n): if a != b: tau = (qv[b] / v[b]) / (qv[a] / v[a]) Q[a, b] *= math.log(tau) / (1 - 1/tau) Q -= np.diag(np.sum(Q, axis=1)) # construct the symmetric matrix that is similar to Q Q_sim = (Q.T * qpsi).T / qpsi Q_sim_W, Q_sim_V = scipy.linalg.eigh(Q_sim) Q_gap = -Q_sim_W[-2] # report some stuff print >> out, 'sampled rate matrix R:' print >> out, R print >> out, 'stationary distn:', v print >> out print >> out, 'scipy.linagl.eig(R):' print >> out, R_W print >> out, R_V print >> out print >> out, 'symmetric matrix similar to R:' print >> out, R_sim print >> out print >> out, 'eigh of the symmetric similar matrix to R:' print >> out, R_sim_W print >> out, R_sim_V print >> out, 'spectral gap:', R_gap print >> out, 'entrywise squares of eigenvectors:' print >> out, R_sim_V ** 2 print >> out, 'a bilinear form involving a fiedler-like eigenvector:' print >> out, ndot(R_sim_V.T[-2], R_sim, R_sim_V.T[-2]) print >> out, 'expected rate:', -np.dot(v, np.diag(R)) print >> out, 'second order expected rate:', -np.dot(v2, np.diag(R)) print >> out print >> out, 'eigenvectors of R from eigenvectors of the similar matrix:' print >> out, R_sim_W print >> out, R_V_rebuilt print >> out print >> out, 'mutation-selection balance matrix Q:' print >> out, Q print >> out, 'stationary distn:', qv print >> out, 'spectral gap:', Q_gap print >> out print >> out, 'symmetric matrix similar to Q:' print >> out, Q_sim print >> out print >> out, 'pi(Q) - pi(R):', dv print >> out, 'gap(Q) - gap(R):', Q_gap - R_gap print >> out, 'diag(Q) - diag(R):', np.diag(Q) - np.diag(R) print >> out, 'trace(Q) - trace(R):', np.trace(Q) - np.trace(R) print >> out print >> out, 'rate away estimate of spectral gap change:' print >> out, np.dot(np.diag(Q) - np.diag(R), R_sim_V.T[-2]**2) print >> out return out.getvalue().rstrip()
def process(fs): n = fs.nstates np.set_printoptions(linewidth=200) out = StringIO() # Sample a symmetric rate matrix and a stationary distribution, # then construct the rate matrix R. S = MatrixUtil.sample_pos_sym_matrix(n) v = mrate.sample_distn(n) psi = np.sqrt(v) R = (S.T / psi).T * psi R -= np.diag(np.sum(R, axis=1)) R_W, R_V = scipy.linalg.eig(R) # construct the symmetric matrix that is similar to R R_sim = (R.T * psi).T / psi if not np.allclose(R_sim, R_sim.T): raise ValueError('the similar symmetric matrix is not symmetric...') R_sim_W, R_sim_V = scipy.linalg.eigh(R_sim) R_gap = -R_sim_W[-2] v2 = R_sim_V.T[-2]**2 # reconstruct the eigenvectors of R R_V_rebuilt = (R_sim_V.T / psi).T # Sample some numbers then subtract mean then normalize. dv = np.random.exponential(1, n) dv -= np.mean(dv) dv *= fs.eps / np.dot(dv, dv) qv = v + dv if any(qv < 0) or any(1 < qv): raise ValueError('the stationary distribution change was too large ' 'for the randomly sampled process') qpsi = np.sqrt(qv) # define the rate matrix if fs.knudsen: Q = (S.T / qpsi).T * qpsi elif fs.sella: Q = R.copy() for a in range(n): for b in range(n): if a != b: tau = (qv[b] / v[b]) / (qv[a] / v[a]) Q[a, b] *= math.log(tau) / (1 - 1 / tau) Q -= np.diag(np.sum(Q, axis=1)) # construct the symmetric matrix that is similar to Q Q_sim = (Q.T * qpsi).T / qpsi Q_sim_W, Q_sim_V = scipy.linalg.eigh(Q_sim) Q_gap = -Q_sim_W[-2] # report some stuff print >> out, 'sampled rate matrix R:' print >> out, R print >> out, 'stationary distn:', v print >> out print >> out, 'scipy.linagl.eig(R):' print >> out, R_W print >> out, R_V print >> out print >> out, 'symmetric matrix similar to R:' print >> out, R_sim print >> out print >> out, 'eigh of the symmetric similar matrix to R:' print >> out, R_sim_W print >> out, R_sim_V print >> out, 'spectral gap:', R_gap print >> out, 'entrywise squares of eigenvectors:' print >> out, R_sim_V**2 print >> out, 'a bilinear form involving a fiedler-like eigenvector:' print >> out, ndot(R_sim_V.T[-2], R_sim, R_sim_V.T[-2]) print >> out, 'expected rate:', -np.dot(v, np.diag(R)) print >> out, 'second order expected rate:', -np.dot(v2, np.diag(R)) print >> out print >> out, 'eigenvectors of R from eigenvectors of the similar matrix:' print >> out, R_sim_W print >> out, R_V_rebuilt print >> out print >> out, 'mutation-selection balance matrix Q:' print >> out, Q print >> out, 'stationary distn:', qv print >> out, 'spectral gap:', Q_gap print >> out print >> out, 'symmetric matrix similar to Q:' print >> out, Q_sim print >> out print >> out, 'pi(Q) - pi(R):', dv print >> out, 'gap(Q) - gap(R):', Q_gap - R_gap print >> out, 'diag(Q) - diag(R):', np.diag(Q) - np.diag(R) print >> out, 'trace(Q) - trace(R):', np.trace(Q) - np.trace(R) print >> out print >> out, 'rate away estimate of spectral gap change:' print >> out, np.dot(np.diag(Q) - np.diag(R), R_sim_V.T[-2]**2) print >> out return out.getvalue().rstrip()
def get_response_content(fs): # read the energies from the form data energies = [] for line in iterutils.stripped_lines(fs.energies.splitlines()): try: energy = float(line) except ValueError as e: raise ValueError('invalid energy: %s' % line) energies.append(energy) n = len(energies) if n > 100: raise ValueError('too many energies') # compute the rate matrix R = np.zeros((n, n)) for row in range(n): for col in range(n): rate = math.exp(-(energies[col] - energies[row])) R[row, col] = rate for i, r in enumerate(R): R[i, i] = -np.sum(r) + 1 # get the transition matrix at large finite time large_t = 1000.0 T = scipy.linalg.expm(R*large_t) # eigendecompose Wr, Vr = scipy.linalg.eig(R, left=False, right=True) Wl, Vl = scipy.linalg.eig(R, left=True, right=False) # get left eigenvector associated with stationary distribution val_vec_pairs = [(abs(Wl[i]), Vl[:,i]) for i in range(n)] dummy, pi_eigenvector = min(val_vec_pairs) # get the stationary distribution itself total = np.sum(pi_eigenvector) pi_arr = np.array([v/total for v in pi_eigenvector]) # get the square root stationary vector and diagonal matrix sqrt_pi_arr = np.sqrt(pi_arr) lam = np.diag(sqrt_pi_arr) # get reciprocal arrays recip_sqrt_pi_arr = np.reciprocal(sqrt_pi_arr) recip_lam = np.reciprocal(lam) # print things np.set_printoptions(linewidth=300) out = StringIO() print >> out, 'rate matrix:' print >> out, R print >> out print >> out, 'rate matrix row sums:' print >> out, np.sum(R, axis=1) print >> out print >> out, 'eigenvalues:' print >> out, Wr print >> out print >> out, 'corresponding orthonormal right eigenvectors (columns):' print >> out, Vr print >> out print >> out, 'eigenvalues:' print >> out, Wl print >> out print >> out, 'corresponding orthonormal left eigenvectors (columns):' print >> out, Vl print >> out print >> out, 'L2 normalized eigenvector associated with stationary distn:' print >> out, pi_eigenvector print >> out print >> out, 'L1 renormalized vector (the stationary distribution):' print >> out, pi_arr print >> out print >> out # eigendecompose the transition matrix Wr, Vr = scipy.linalg.eig(T, left=False, right=True) Wl, Vl = scipy.linalg.eig(T, left=True, right=False) print >> out, 'transition matrix for t=%f:' % large_t print >> out, T print >> out print >> out, 'transition matrix row sums:' print >> out, np.sum(T, axis=1) print >> out print >> out, 'eigenvalues:' print >> out, Wr print >> out print >> out, 'corresponding orthonormal right eigenvectors (columns):' print >> out, Vr print >> out print >> out, 'eigenvalues:' print >> out, Wl print >> out print >> out, 'corresponding orthonormal left eigenvectors (columns):' print >> out, Vl print >> out print >> out, 'incorrect reconstitution of the transition matrix:' print >> out, ndot(Vr, np.diag(Wr), Vl.T) print >> out print >> out # Use the known properties of reversibility to symmetrize the matrix. t = 3 coeffs, rates, c = get_identicality_params(R) print >> out, 'brute identicality computation for t=%f:' % t print >> out, get_numerical_identicality(R, t) print >> out print >> out, 'sophisticated identicality computation for t=%f:' % t print >> out, get_symbolic_identicality(coeffs, rates, c, t) print >> out print >> out # Try another couple rate matrices. e2 = math.exp(2) en2 = math.exp(-2) rate_matrices = [ np.array([[-2.0, 2.0], [2.0, -2.0]]), np.array([[-1.0, 1.0], [3.0, -3.0]]), np.array([[-1, 1, 0], [1, -2, 1], [0, 1, -1]]), #np.array([[-4.0, 4.0, 0], [1, -2, 1], [0, 4, -4]])] #np.array([[-1, 1, 0], [7, -14, 7], [0, 1, -1]])] np.array([[-en2, en2, 0], [e2, -2*e2, e2], [0, en2, -en2]])] t = 3.0 for R in rate_matrices: coeffs, rates, c = get_identicality_params(R) print >> out, 'test rate matrix:' print >> out, R print >> out print >> out, 'eigenvalues:' print >> out, scipy.linalg.eigvals(R) print >> out print >> out, 'stationary distribution:' print >> out, R_to_distn(R) print >> out print >> out, 'brute identicality computation for t=%f:' % t print >> out, get_numerical_identicality(R, t) print >> out print >> out, 'sophisticated identicality computation for t=%f:' % t print >> out, get_symbolic_identicality(coeffs, rates, c, t) print >> out print >> out, 'identicality derivative for t=%f:' % t print >> out, get_identicality_derivative(coeffs, rates, t) print >> out print >> out # return the message return out.getvalue().rstrip()