def test_invariant_selection_transition(self): selection = 1.1 nchromosomes = 3 npositions = 2 P = get_selection_transition_matrix( selection, nchromosomes, npositions) MatrixUtil.assert_transition_matrix(P)
def get_absorption_variance(P, plain, absorbing): """ Get expected times to absorption. Note that if an index is indicated as absorbing by its presence in the sequence of absorbing state indices, then it will be treated as absorbing even if the transition matrix P indicates otherwise. @param P: transition matrix @param plain: sequence of plain state indices @param absorbing: sequence of absorbing state indices @return: variance of times to absorption or 0 from absorbing states """ # check that P is really a transition matrix MatrixUtil.assert_transition_matrix(P) # define some state lists states = np.hstack((plain, absorbing)) # check that the index sequences match the size of P if sorted(states) != range(len(P)): raise ValueError('P is not conformant with the index sequences') # compute the time to absorption Q = P[plain, :][:, plain] c = np.ones_like(plain) I = np.eye(len(plain)) t = linalg.solve(I - Q, c) # compute the variance vplain = 2*linalg.solve(I - Q, t) - t*(t+1) v = np.hstack((vplain, np.zeros_like(absorbing))) return v[inverse_permutation(states)]
def main(args): alpha = args.alpha N = args.N k = 3 print 'alpha:', alpha print 'N:', N print 'k:', k print M = np.array(list(multinomstate.gen_states(N, k)), dtype=int) T = multinomstate.get_inverse_map(M) R_mut = wrightcore.create_mutation_abc(M, T) R_drift = wrightcore.create_moran_drift_rate_k3(M, T) Q = alpha * R_mut + R_drift # pick out the correct eigenvector W, V = scipy.linalg.eig(Q.T) w, v = min(zip(np.abs(W), V.T)) print 'rate matrix:' print Q print print 'transpose of rate matrix:' print Q.T print print 'eigendecomposition of transpose of rate matrix as integers:' print scipy.linalg.eig(Q.T) print print 'transpose of rate matrix in mathematica notation:' print MatrixUtil.m_to_mathematica_string(Q.T.astype(int)) print print 'abs eigenvector corresponding to smallest abs eigenvalue:' print np.abs(v) print
def test_invariant_selection_transition(self): selection = 1.1 nchromosomes = 3 npositions = 2 P = get_selection_transition_matrix(selection, nchromosomes, npositions) MatrixUtil.assert_transition_matrix(P)
def R_to_distn_nonspectral(R): """ The rate matrix must be irreducible and reversible. It is not necessarily symmetric. If the rate matrix is symmetric then this function is overkill because the stationary distribution would be uniform. """ nstates = len(R) V = set(range(nstates)) E = set() for i in range(nstates): for j in range(i): if R[i, j]: if not R[j, i]: raise MatrixUtil.MatrixError( 'the matrix is not reversible') edge = frozenset((i, j)) E.add(edge) nd = graph.g_to_nd(V, E) # construct an arbitrary rooted spanning tree of the states V_component, D_component = graph.nd_to_dag_component(nd, 0) if V_component != V: raise MatrixUtil.MatrixError('the matrix is not irreducible') # compute the stationary probabilities relative to the first state weights = [None] * nstates v_to_children = graph.dag_to_cd(V_component, D_component) preorder_states = graph.topo_sort(V_component, D_component) weights[preorder_states[0]] = 1.0 for parent in preorder_states: for child in v_to_children[parent]: ratio = R[parent, child] / R[child, parent] weights[child] = weights[parent] * ratio total = sum(weights) return np.array(weights) / total
def get_type_2_info(P): """ The expected time for a type 2 event is computed as follows. It is the expected number of steps from AB to ab conditional on not entering the states AB, Ab, or aB. It should also include a bit of exponential delay that it takes to leave the final fixed AB state before embark. @param P: a huge transition matrix which is not modified @return: expectation and variance of compensatory substitution time """ MatrixUtil.assert_transition_matrix(P) nstates = len(P) # define index sequences plain = range(4, nstates) forbidden = [0, 1, 2] target = [3] # H = hittingtime.get_conditional_transition_matrix(P, plain, forbidden, target) t = hittingtime.get_absorption_time(H, plain + forbidden, target) v = hittingtime.get_absorption_variance(H, plain + forbidden, target) # t0 = t[0] v0 = v[0] # add a geometric rv that depends on probability of leaving fixed AB p = 1 - P[0, 0] t0 += (1 - p) / p v0 += (1 - p) / (p * p) # return t0, v0
def get_response_content(fs): # read the matrix D = fs.matrix n = len(D) if n < 3: raise HandlingError('the matrix should have at least three rows') # define the other matrices D_inv = np.linalg.inv(D) row_sums = np.sum(D_inv, 0) grand_sum = np.sum(D_inv) A = np.zeros((n,n)) B = np.zeros((n,n)) for i in range(n): for j in range(n): A[i][j] = row_sums[i] + row_sums[j] - grand_sum B[i][j] = row_sums[i] * row_sums[j] / grand_sum C = np.zeros((n,n)) for i in range(n): for j in range(n): C[i][j] = D_inv[i][j] - B[i][j] # define the response out = StringIO() print >> out, 'additive:' print >> out, MatrixUtil.m_to_string(A) print >> out, 'multiplicative:' print >> out, MatrixUtil.m_to_string(B) for row in C: print >> out, sum(row) # return the response return out.getvalue()
def get_response_content(fs): # arbitrarily define the size of the alphabet k = 4 # define the response out = StringIO() # get the tree tree = NewickIO.parse(fs.tree, FelTree.NewickTree) # define the order of the tip names ordered_tip_names = list( sorted(node.get_name() for node in tree.gen_tips())) n = len(ordered_tip_names) # get the matrix of pairwise distances among the tips D = np.array(tree.get_distance_matrix(ordered_tip_names)) D_vector = get_principal_coordinate(D) # get the dissimilarity matrix from the distance matrix dissimilarity = np.array([[distance_to_dissimilarity(d, k) for d in row] for row in D]) dissimilarity_vector = get_principal_coordinate(dissimilarity) # get the principal coordinates of the distance-like matrices print >> out, 'original distance matrix:' print >> out, MatrixUtil.m_to_string(D) print >> out print >> out, 'projections onto the principal coordinate using the original distance matrix:' for name, value in zip(ordered_tip_names, D_vector): print >> out, '\t'.join((name, str(value))) print >> out print >> out, 'dissimilarity matrix:' print >> out, MatrixUtil.m_to_string(dissimilarity) print >> out print >> out, 'projections onto the principal coordinate using the dissimilarity matrix:' for name, value in zip(ordered_tip_names, dissimilarity_vector): print >> out, '\t'.join((name, str(value))) print >> out # return the response return out.getvalue()
def get_two_allele_distribution(N_big, N_small, f0, f1, f_subsample): """ Assumes small genic selection. Assumes small mutation. The mutational bias does not affect the distribution. @param N_big: total number of alleles in the population @param N_small: number of alleles sampled from the population @param f0: fitness of allele 0 @param f1: fitness of allele 1 @param f_subsample: subsampling function @return: distribution over all non-fixed population states """ # construct a transition matrix nstates = N_big + 1 P = np.zeros((nstates, nstates)) for i in range(nstates): p0, p1 = wrightfisher.genic_diallelic(f0, f1, i, N_big - i) if i == 0: P[i, 1] = 1.0 elif i == N_big: P[i, N_big - 1] = 1.0 else: for j in range(nstates): logp = StatsUtil.binomial_log_pmf(j, N_big, p0) P[i, j] = math.exp(logp) # find the stationary distribution v = MatrixUtil.get_stationary_distribution(P) MatrixUtil.assert_distribution(v) if not np.allclose(v, np.dot(v, P)): raise ValueError('expected a left eigenvector with eigenvalue 1') # return the stationary distribution conditional on dimorphism print v distn = f_subsample(v, N_small) return distn[1:-1] / np.sum(distn[1:-1])
def get_type_2_info(P): """ The expected time for a type 2 event is computed as follows. It is the expected number of steps from AB to ab conditional on not entering the states AB, Ab, or aB. It should also include a bit of exponential delay that it takes to leave the final fixed AB state before embark. @param P: a huge transition matrix which is not modified @return: expectation and variance of compensatory substitution time """ MatrixUtil.assert_transition_matrix(P) nstates = len(P) # define index sequences plain = range(4, nstates) forbidden = [0, 1, 2] target = [3] # H = hittingtime.get_conditional_transition_matrix( P, plain, forbidden, target) t = hittingtime.get_absorption_time( H, plain+forbidden, target) v = hittingtime.get_absorption_variance( H, plain+forbidden, target) # t0 = t[0] v0 = v[0] # add a geometric rv that depends on probability of leaving fixed AB p = 1 - P[0, 0] t0 += (1 - p) / p v0 += (1 - p) / (p*p) # return t0, v0
def get_endpoint_conditioned_expected_occupancy(R, v, a, b, T): """ Holmes and Rubin 2002. @param R: rate matrix @param v: stationary distribution @param a: integer state index of initial state @param b: integer state index of final state @param T: elapsed time @return: endpoint conditioned expected amount of time spent in each state """ n = len(v) psi = np.sqrt(v) S = (R.T * psi).T / psi MatrixUtil.assert_symmetric(S) w, U = scipy.linalg.eigh(S) if not np.allclose(np.dot(U, U.T), np.eye(n)): raise Exception('U should be orthogonal') P = scipy.linalg.expm(T*R) # the Mab is Holmes and Rubin 2002 notation Mab = (psi[b] / psi[a]) * np.sum(U[a] * U[b] * np.exp(T*w)) if not np.allclose(P[a,b], Mab): raise Exception('not close: %s %s' % (P[a,b], Mab)) coeff = (psi[b] / psi[a]) / Mab K = _holmes_rubin_2002_kernel(w, T) occupancy = coeff * np.array([ _holmes_rubin_2002_summation(U, a, b, i, K) for i in range(n)]) if not np.allclose(T, np.sum(occupancy)): raise Exception( 'the expectected occupancy times should add up ' 'to the total time') return occupancy
def hard_coded_analysis_a(): tree_string = '(a:1, (b:2, d:5):1, c:4);' tree = NewickIO.parse(tree_string, FelTree.NewickTree) states = [] id_list = [] for state, id_ in sorted((node.name, id(node)) for node in tree.gen_tips()): id_list.append(id_) states.append(state) for node in tree.gen_internal_nodes(): id_list.append(id(node)) states.append('') n = len(states) for method in ('tips', 'full'): # get the distance matrix from the tree if method == 'tips': print 'leaves only:' distance_matrix = tree.get_distance_matrix(states) else: print 'leaves and internal nodes:' distance_matrix = tree.get_full_distance_matrix(id_list) print 'distance matrix from the tree:' print MatrixUtil.m_to_string(distance_matrix) # get the equivalent euclidean points z_points = list(gen_euclidean_points(distance_matrix)) for state, point in zip(states, z_points): print state, point # get the distance matrix from the transformed points print 'distance matrix from the transformed points:' distance_matrix = get_euclidean_distance_matrix(z_points) print MatrixUtil.m_to_string(distance_matrix) print
def get_response_content(fs): # get the tree tree = NewickIO.parse(fs.tree, FelTree.NewickTree) # assert the the given labels are tips of the tree tip_name_set = set(node.get_name() for node in tree.gen_tips()) user_name_set = set([fs.lhs_a, fs.lhs_b, fs.rhs_a, fs.rhs_b]) bad_names = user_name_set - tip_name_set if bad_names: msg = 'these labels are not valid tips: %s' % ', '.join(bad_names) raise HandlingError(msg) # get the submatrix of the distance matrix ordered_names = list(sorted(node.get_name() for node in tree.gen_tips())) D = np.array(tree.get_distance_matrix(ordered_names)) # get the response matrix R = Clustering.get_R_stone(D) # get the two by two matrix name_to_index = dict((name, i) for i, name in enumerate(ordered_names)) R_reduced = np.zeros((2, 2)) la = name_to_index[fs.lhs_a] lb = name_to_index[fs.lhs_b] ra = name_to_index[fs.rhs_a] rb = name_to_index[fs.rhs_b] R_reduced[0][0] = R[la][ra] R_reduced[0][1] = R[la][rb] R_reduced[1][0] = R[lb][ra] R_reduced[1][1] = R[lb][rb] epsilon = 1e-13 criterion = np.linalg.det(R_reduced) if abs(criterion) < epsilon: criterion = 0 # in analogy to the four point condition, use two different ways of calculating the distance blen_a = (D[la][rb] + D[lb][ra] - D[la][lb] - D[ra][rb]) / 2.0 blen_b = (D[la][ra] + D[lb][rb] - D[la][lb] - D[ra][rb]) / 2.0 blen = min(blen_a, blen_b) # define the response out = StringIO() paragraphs = [] if fs.show_response: paragraph = [ 'response matrix with rows ordered alphabetically by leaf label:', MatrixUtil.m_to_string(R) ] paragraphs.append(paragraph) if fs.show_reduced_response: paragraph = [ '2x2 submatrix of the response matrix:', MatrixUtil.m_to_string(R_reduced) ] paragraphs.append(paragraph) if True: paragraph = [ 'determinant of the 2x2 submatrix of the response matrix:', str(criterion) ] paragraphs.append(paragraph) if fs.show_blen: paragraph = ['branch length defined by the split:', str(blen)] paragraphs.append(paragraph) # return the response return '\n\n'.join('\n'.join(p) for p in paragraphs) + '\n'
def get_response_content(fs): # precompute some transition matrices P_drift_selection = pgmsinglesite.create_drift_selection_transition_matrix( fs.npop, fs.selection_ratio) MatrixUtil.assert_transition_matrix(P_drift_selection) P_mutation = pgmsinglesite.create_mutation_transition_matrix( fs.npop, fs.mutation_ab, fs.mutation_ba) MatrixUtil.assert_transition_matrix(P_mutation) # define the R table headers headers = ['generation', 'number.of.mutants'] # compute the path samples P = np.dot(P_drift_selection, P_mutation) mypath = PathSampler.sample_endpoint_conditioned_path( fs.nmutants_initial, fs.nmutants_final, fs.ngenerations, P) arr = [[i, nmutants] for i, nmutants in enumerate(mypath)] # create the R table string and scripts # get the R table table_string = RUtil.get_table_string(arr, headers) # get the R script script = get_ggplot() # create the R plot image device_name = Form.g_imageformat_to_r_function[fs.imageformat] retcode, r_out, r_err, image_data = RUtil.run_plotter( table_string, script, device_name) if retcode: raise RUtil.RError(r_err) return image_data
def get_response_content(fs): """ @param fs: a FieldStorage object containing the cgi arguments @return: a (response_headers, response_text) pair """ # get the tree tree = NewickIO.parse(fs.tree, FelTree.NewickTree) # read the ordered labels ordered_labels = Util.get_stripped_lines(StringIO(fs.labels)) # validate the input observed_label_set = set(node.get_name() for node in tree.gen_tips()) if set(ordered_labels) != observed_label_set: msg = 'the labels should match the labels of the leaves of the tree' raise HandlingError(msg) # get the matrix of pairwise distances among the tips D = np.array(tree.get_distance_matrix(ordered_labels)) L = Euclid.edm_to_laplacian(D) w, v = get_eigendecomposition(L) C = get_contrast_matrix(w, v) # set elements with small absolute value to zero C[abs(C) < fs.epsilon] = 0 # start to prepare the reponse out = StringIO() if fs.plain_format: print >> out, MatrixUtil.m_to_string(C) elif fs.matlab_format: print >> out, MatrixUtil.m_to_matlab_string(C) elif fs.r_format: print >> out, MatrixUtil.m_to_R_string(C) # write the response return out.getvalue()
def laplacian_to_adjacency(L): """ @param L: a laplacian matrix @return: an adjacency matrix """ MatrixUtil.assert_square(L) return np.diag(np.diag(L)) - L
def get_endpoint_conditioned_expected_occupancy(R, v, a, b, T): """ Holmes and Rubin 2002. @param R: rate matrix @param v: stationary distribution @param a: integer state index of initial state @param b: integer state index of final state @param T: elapsed time @return: endpoint conditioned expected amount of time spent in each state """ n = len(v) psi = np.sqrt(v) S = (R.T * psi).T / psi MatrixUtil.assert_symmetric(S) w, U = scipy.linalg.eigh(S) if not np.allclose(np.dot(U, U.T), np.eye(n)): raise Exception('U should be orthogonal') P = scipy.linalg.expm(T * R) # the Mab is Holmes and Rubin 2002 notation Mab = (psi[b] / psi[a]) * np.sum(U[a] * U[b] * np.exp(T * w)) if not np.allclose(P[a, b], Mab): raise Exception('not close: %s %s' % (P[a, b], Mab)) coeff = (psi[b] / psi[a]) / Mab K = _holmes_rubin_2002_kernel(w, T) occupancy = coeff * np.array( [_holmes_rubin_2002_summation(U, a, b, i, K) for i in range(n)]) if not np.allclose(T, np.sum(occupancy)): raise Exception('the expectected occupancy times should add up ' 'to the total time') return occupancy
def laplacian_to_edm(L): """ @param L: a laplacian matrix @return: a Euclidean distance matrix """ MatrixUtil.assert_square(L) return dccov_to_edm(laplacian_to_dccov(L))
def adjacency_to_laplacian(A): """ @param A: an adjacency matrix @return: a laplacian matrix """ MatrixUtil.assert_square(A) return np.diag(np.sum(A, 0)) - A
def test_row_sums(self): N = 20 k = 4 mutation, fitness = get_test_mutation_fitness() P = get_transition_matrix(N, k, mutation, fitness) MatrixUtil.assert_transition_matrix(mutation) MatrixUtil.assert_transition_matrix(P)
def bott_duffin(M, v): """ Compute a constrained generalized inverse. Specifically, this is the Bott-Duffin inverse of M constrained to the orthogonal complement of v. This function assumes that v has rank 1, although Bott-Duffin inverses are also defined for inverses constrained to orthogonal complements of higher dimensional subspaces. Maybe this could be a separate python function where v is replaced by a shape-2 numpy array. @param M: a matrix @param v: a vector @return: the constrained generalized inverse of M """ # check the shapes of the input matrix and vector MatrixUtil.assert_1d(v) n = len(v) if M.shape != (n, n): raise ValueError('M and v have incompatible shapes') # check that v is nonzero v_dot_v = np.inner(v, v) if not v_dot_v: raise ValueError('expected nonzero v') # compute the orthogonal projection onto v P = np.outer(v, v) / v_dot_v # compute the orthogonal projection onto the orthogonal complement of v I = np.eye(n) C = I - P # compute the constrained generalized inverse B = np.dot(C, np.linalg.inv(np.dot(M, C) + P)) return B
def dccov_to_edm(HSH): """ @param HSH: a double centered covariance matrix @return: a Euclidean distance matrix """ MatrixUtil.assert_square(HSH) return cov_to_edm(HSH)
def get_response_content(fs): # arbitrarily define the size of the alphabet k = 4 # define the response out = StringIO() # get the tree tree = NewickIO.parse(fs.tree, FelTree.NewickTree) # define the order of the tip names ordered_tip_names = list(sorted(node.get_name() for node in tree.gen_tips())) n = len(ordered_tip_names) # get the matrix of pairwise distances among the tips D = np.array(tree.get_distance_matrix(ordered_tip_names)) D_vector = get_principal_coordinate(D) # get the dissimilarity matrix from the distance matrix dissimilarity = np.array([[distance_to_dissimilarity(d, k) for d in row] for row in D]) dissimilarity_vector = get_principal_coordinate(dissimilarity) # get the principal coordinates of the distance-like matrices print >> out, 'original distance matrix:' print >> out, MatrixUtil.m_to_string(D) print >> out print >> out, 'projections onto the principal coordinate using the original distance matrix:' for name, value in zip(ordered_tip_names, D_vector): print >> out, '\t'.join((name, str(value))) print >> out print >> out, 'dissimilarity matrix:' print >> out, MatrixUtil.m_to_string(dissimilarity) print >> out print >> out, 'projections onto the principal coordinate using the dissimilarity matrix:' for name, value in zip(ordered_tip_names, dissimilarity_vector): print >> out, '\t'.join((name, str(value))) print >> out # return the response return out.getvalue()
def get_response_content(fs): # read the matrix D = fs.matrix n = len(D) if n < 3: raise HandlingError('the matrix should have at least three rows') # define the other matrices D_inv = np.linalg.inv(D) row_sums = np.sum(D_inv, 0) grand_sum = np.sum(D_inv) A = np.zeros((n, n)) B = np.zeros((n, n)) for i in range(n): for j in range(n): A[i][j] = row_sums[i] + row_sums[j] - grand_sum B[i][j] = row_sums[i] * row_sums[j] / grand_sum C = np.zeros((n, n)) for i in range(n): for j in range(n): C[i][j] = D_inv[i][j] - B[i][j] # define the response out = StringIO() print >> out, 'additive:' print >> out, MatrixUtil.m_to_string(A) print >> out, 'multiplicative:' print >> out, MatrixUtil.m_to_string(B) for row in C: print >> out, sum(row) # return the response return out.getvalue()
def test_mutation(self): npop = 10 mutation_ab = 0.1 mutation_ba = 0.2 P = create_mutation_transition_matrix( npop, mutation_ab, mutation_ba) MatrixUtil.assert_transition_matrix(P)
def edm_to_dccov(D): """ @param D: a Euclidean distance matrix @return: a double centered covariance matrix """ MatrixUtil.assert_square(D) return -(0.5)*MatrixUtil.double_centered(D)
def get_response_content(fs): # get the tree tree = NewickIO.parse(fs.tree, FelTree.NewickTree) # assert the the given labels are tips of the tree tip_name_set = set(node.get_name() for node in tree.gen_tips()) user_name_set = set([fs.lhs_a, fs.lhs_b, fs.rhs_a, fs.rhs_b]) bad_names = user_name_set - tip_name_set if bad_names: msg = 'these labels are not valid tips: %s' % ', '.join(bad_names) raise HandlingError(msg) # get the submatrix of the distance matrix ordered_names = list(sorted(node.get_name() for node in tree.gen_tips())) D = np.array(tree.get_distance_matrix(ordered_names)) # get the response matrix R = Clustering.get_R_stone(D) # get the two by two matrix name_to_index = dict((name, i) for i, name in enumerate(ordered_names)) R_reduced = np.zeros((2,2)) la = name_to_index[fs.lhs_a] lb = name_to_index[fs.lhs_b] ra = name_to_index[fs.rhs_a] rb = name_to_index[fs.rhs_b] R_reduced[0][0] = R[la][ra] R_reduced[0][1] = R[la][rb] R_reduced[1][0] = R[lb][ra] R_reduced[1][1] = R[lb][rb] epsilon = 1e-13 criterion = np.linalg.det(R_reduced) if abs(criterion) < epsilon: criterion = 0 # in analogy to the four point condition, use two different ways of calculating the distance blen_a = (D[la][rb] + D[lb][ra] - D[la][lb] - D[ra][rb]) / 2.0 blen_b = (D[la][ra] + D[lb][rb] - D[la][lb] - D[ra][rb]) / 2.0 blen = min(blen_a, blen_b) # define the response out = StringIO() paragraphs = [] if fs.show_response: paragraph = [ 'response matrix with rows ordered alphabetically by leaf label:', MatrixUtil.m_to_string(R)] paragraphs.append(paragraph) if fs.show_reduced_response: paragraph = [ '2x2 submatrix of the response matrix:', MatrixUtil.m_to_string(R_reduced)] paragraphs.append(paragraph) if True: paragraph = [ 'determinant of the 2x2 submatrix of the response matrix:', str(criterion)] paragraphs.append(paragraph) if fs.show_blen: paragraph = [ 'branch length defined by the split:', str(blen)] paragraphs.append(paragraph) # return the response return '\n\n'.join('\n'.join(p) for p in paragraphs) + '\n'
def q_to_cov(Q): """ @param Q: a neighbor joining Q matrix @return: something like a covariance matrix """ MatrixUtil.assert_square(Q) n = len(Q) S = -Q/(2*(n-2)) return S
def get_response_content(fs): # get the tree tree = NewickIO.parse(fs.tree, FelTree.NewickTree) # get the arbitrarily ordered names ordered_names = set(node.get_name() for node in tree.preorder()) # get the corresponding ordered ids name_to_id = dict((node.get_name(), id(node)) for node in tree.preorder()) ordered_ids = [name_to_id[name] for name in ordered_names] # get the full distance matrix D_direct = np.array(tree.get_full_distance_matrix(ordered_ids)) # get the full weighted adjacency matrix A = np.array(tree.get_affinity_matrix(ordered_ids)) # get the full degree matrix degree_matrix = np.diag(np.sum(A, 0)) # get the sum of the branch lengths n = len(ordered_names) gamma_inv = 0 for i in range(n): for j in range(n): if i < j: if A[i][j]: gamma_inv += 1.0 / A[i][j] gamma = 1.0 / gamma_inv # get the delta vector delta_list = [] for row in A: nonzero_edge_count = sum(1 for x in row if x) delta_list.append(2 - nonzero_edge_count) d = np.array(delta_list) # get the full distance matrix using the clever formula J = np.ones((n, n)) D_clever = 2*np.linalg.inv(A + gamma * np.outer(d, d) - degree_matrix) # check whether the distance matrices are close closeness_string = 'the distance matrices are close' if not np.allclose(D_direct, D_clever): closeness_string = 'the distance matrices are not close' # define the response out = StringIO() paragraphs = [] if fs.show_direct_d: paragraph = [ 'directly calculated distance matrix:', MatrixUtil.m_to_string(D_direct)] paragraphs.append(paragraph) if fs.show_clever_d: paragraph = [ 'cleverly calculated distance matrix:', MatrixUtil.m_to_string(D_clever)] paragraphs.append(paragraph) if fs.show_closeness: paragraph = [ 'closeness:', closeness_string] paragraphs.append(paragraph) # return the response return '\n\n'.join('\n'.join(p) for p in paragraphs) + '\n'
def get_response_content(fs): # precompute some transition matrices P_drift_selection = pgmsinglesite.create_drift_selection_transition_matrix( fs.npop, fs.selection_ratio) MatrixUtil.assert_transition_matrix(P_drift_selection) P_mutation = pgmsinglesite.create_mutation_transition_matrix( fs.npop, fs.mutation_ab, fs.mutation_ba) MatrixUtil.assert_transition_matrix(P_mutation) # define the R table headers headers = [ 'generation', 'number.of.mutants', 'probability', 'log.prob', ] # compute the transition matrix P = np.dot(P_drift_selection, P_mutation) # Compute the endpoint conditional probabilities for various states # along the unobserved path. nstates = fs.npop + 1 M = np.zeros((nstates, fs.ngenerations)) M[fs.nmutants_initial, 0] = 1.0 M[fs.nmutants_final, fs.ngenerations-1] = 1.0 for i in range(fs.ngenerations-2): A_exponent = i + 1 B_exponent = fs.ngenerations - 1 - A_exponent A = np.linalg.matrix_power(P, A_exponent) B = np.linalg.matrix_power(P, B_exponent) weights = np.zeros(nstates) for k in range(nstates): weights[k] = A[fs.nmutants_initial, k] * B[k, fs.nmutants_final] weights /= np.sum(weights) for k, p in enumerate(weights): M[k, i+1] = p arr = [] for g in range(fs.ngenerations): for k in range(nstates): p = M[k, g] if p: logp = math.log(p) else: logp = float('-inf') row = [g, k, p, logp] arr.append(row) # create the R table string and scripts # get the R table table_string = RUtil.get_table_string(arr, headers) # get the R script script = get_ggplot() # create the R plot image device_name = Form.g_imageformat_to_r_function[fs.imageformat] retcode, r_out, r_err, image_data = RUtil.run_plotter( table_string, script, device_name) if retcode: raise RUtil.RError(r_err) return image_data
def bott_duffin_const(M): """ Compute a constrained generalized inverse. Specifically, this is the Bott-Duffin inverse of M constrained to the orthogonal complement of the constant vector. """ MatrixUtil.assert_square(M) n = len(M) e = np.ones(n) return bott_duffin(M, e)
def test_invariant_mutation_transition_s(self): mutation = 0.01 nchromosomes = 3 npositions = 2 ci_to_short, short_to_count, sorted_chrom_lists = get_state_space_info( nchromosomes, npositions) P = get_mutation_transition_matrix_s( ci_to_short, short_to_count, sorted_chrom_lists, mutation, nchromosomes, npositions) MatrixUtil.assert_transition_matrix(P)
def test_invariant_mutation_transition_s(self): mutation = 0.01 nchromosomes = 3 npositions = 2 ci_to_short, short_to_count, sorted_chrom_lists = get_state_space_info( nchromosomes, npositions) P = get_mutation_transition_matrix_s(ci_to_short, short_to_count, sorted_chrom_lists, mutation, nchromosomes, npositions) MatrixUtil.assert_transition_matrix(P)
def get_response_content(fs): # read the matrix L = fs.laplacian # read the ordered labels ordered_labels = Util.get_stripped_lines(StringIO(fs.labels)) if not ordered_labels: raise HandlingError('no ordered taxa were provided') if len(ordered_labels) != len(set(ordered_labels)): raise HandlingError('the ordered taxa should be unique') # get the label selection and its complement min_selected_labels = 2 min_unselected_labels = 1 selected_labels = set(Util.get_stripped_lines(StringIO(fs.selection))) if len(selected_labels) < min_selected_labels: raise HandlingError( 'at least %d taxa should be selected to be grouped' % min_selected_labels) # get the set of labels in the complement unselected_labels = set(ordered_labels) - selected_labels if len(unselected_labels) < min_unselected_labels: raise HandlingError( 'at least %d taxa should remain outside the selected group' % min_unselected_labels) # assert that no bizarre labels were selected weird_labels = selected_labels - set(ordered_labels) if weird_labels: raise HandlingError('some selected taxa are invalid: ' + str(weird_labels)) # assert that the size of the distance matrix is compatible with the number of ordered labels if len(L) != len(ordered_labels): raise HandlingError( 'the number of listed taxa does not match the number of rows in the distance matrix' ) # get the set of selected indices and its complement n = len(L) index_selection = set(i for i, label in enumerate(ordered_labels) if label in selected_labels) index_complement = set(range(n)) - index_selection # begin the response out = StringIO() # calculate the new laplacian matrix L_small = SchurAlgebra.mschur(L, index_selection) D_small = Euclid.laplacian_to_edm(L_small) # print the matrices and the labels of its rows print >> out, 'new laplacian matrix:' print >> out, MatrixUtil.m_to_string(L_small) print >> out print >> out, 'new distance matrix:' print >> out, MatrixUtil.m_to_string(D_small) print >> out print >> out, 'new taxon labels:' for index in sorted(index_complement): print >> out, ordered_labels[index] # write the response return out.getvalue()
def laplacian_to_dccov(L): """ @param L: a laplacian matrix @return: a double centered covariance matrix """ MatrixUtil.assert_square(L) M = np.ones_like(L) / float(len(L)) # This should be the same but perhaps not as numerically stable: # HSH = np.linalg.pinv(L) HSH = np.linalg.pinv(L - M) + M return HSH
def cov_to_edm(S): """ @param S: a covariance matrix @return: a Euclidean distance matrix """ MatrixUtil.assert_square(S) n = len(S) d = np.diag(S) e = np.ones_like(d) D = np.outer(d, e) + np.outer(e, d) - 2*S return D
def edm_to_q(D): """ @param D: a treelike distance matrix @return: the neighbor joining Q matrix """ MatrixUtil.assert_square(D) n = len(D) r = np.sum(D, 0) e = np.ones_like(r) Q = (n-2)*D - np.outer(e, r) - np.outer(r, e) return Q
def get_wcss(sqdists, labels): """ Get the within-cluster sum of squares. @param sqdists: for each point, the squared distance to each center @param labels: cluster labels @return: within-cluster sum of squares """ MatrixUtil.assert_2d(sqdists) MatrixUtil.assert_1d(labels) if len(sqdists) != len(labels): raise ValueError('array incompatibility') return sum(row[label] for row, label in zip(sqdists, labels))
def dccov_to_laplacian(HSH): """ This function stably pseudoinverts a double centered matrix. @param HSH: a double centered covariance matrix @return: a laplacian matrix """ MatrixUtil.assert_square(HSH) M = np.ones_like(HSH) / float(len(HSH)) # This should be the same but perhaps not as numerically stable: # L = np.linalg.pinv(HSH) L = np.linalg.pinv(HSH - M) + M return L