def get_harmonically_extended_MDS(self): Lbb = Ftree.TB_to_L_block(self.T, self.B, self.internal, self.internal) Lba = Ftree.TB_to_L_block(self.T, self.B, self.internal, self.leaves) L_schur = Ftree.TB_to_L_schur(self.T, self.B, self.leaves) W, V = scipy.linalg.eigh(L_schur, eigvals=(1, 2)) V = V * np.reciprocal(np.sqrt(W)) V = self._reflected_to_reference(V) Y = -np.dot(np.dot(np.linalg.pinv(Lbb), Lba), V) return np.vstack([V, Y])
def get_response_content(fs): # read the trees T_true, B_true, N_true = FtreeIO.newick_to_TBN(fs.true_tree) T_test, B_test, N_test = FtreeIO.newick_to_TBN(fs.test_tree) # we are concerned about the names of the leaves of the two trees true_leaves = Ftree.T_to_leaves(T_true) test_leaves = Ftree.T_to_leaves(T_test) true_leaf_to_n = dict((v, N_true[v]) for v in true_leaves) test_leaf_to_n = dict((v, N_test[v]) for v in test_leaves) # check that all leaves are named if len(true_leaves) != len(true_leaf_to_n): raise ValueError( 'all leaves in the leaf MDS tree should be named') if len(test_leaves) != len(test_leaf_to_n): raise ValueError( 'all leaves in the harmonic extension tree should be named') # check that within each tree all leaves are uniquely named if len(set(true_leaf_to_n.values())) != len(true_leaves): raise ValueError( 'all leaf names in the leaf MDS tree should be unique') if len(set(test_leaf_to_n.values())) != len(test_leaves): raise ValueError( 'all leaf names in the harmonic extension tree ' 'should be unique') # check that the leaf name sets are the same if set(true_leaf_to_n.values()) != set(test_leaf_to_n.values()): raise ValueError( 'the two trees should have corresponding leaf names') # invert the leaf name maps true_n_to_leaf = dict((n, v) for v, n in true_leaf_to_n.items()) test_n_to_leaf = dict((n, v) for v, n in test_leaf_to_n.items()) # get correspondingly ordered leaf sequences leaf_names = true_leaf_to_n.values() true_leaves_reordered = [true_n_to_leaf[n] for n in leaf_names] test_leaves_reordered = [test_n_to_leaf[n] for n in leaf_names] # get the Schur complement matrix for the leaves L_schur_true = Ftree.TB_to_L_schur(T_true, B_true, true_leaves_reordered) # get the MDS points w, V = scipy.linalg.eigh(L_schur_true, eigvals=(1, 2)) X = np.dot(V, np.diag(np.reciprocal(np.sqrt(w)))) # get the linear operator that defines the harmonic extension test_internal = Ftree.T_to_internal_vertices(T_test) L22 = Ftree.TB_to_L_block(T_test, B_test, test_internal, test_internal) L21 = Ftree.TB_to_L_block(T_test, B_test, test_internal, test_leaves_reordered) M = -np.dot(np.linalg.pinv(L22), L21) # get the harmonic extension X_extension = np.dot(M, X) X_extended = np.vstack([X, X_extension]) # draw the image v_to_index = Ftree.invseq(test_leaves_reordered + test_internal) physical_size = (640, 480) ext = Form.g_imageformat_to_ext[fs.imageformat] return get_animation_frame(ext, physical_size, fs.scale, v_to_index, T_test, X_extended)
def get_response_content(fs): # read the tree T, B, N = FtreeIO.newick_to_TBN(fs.tree) leaves = Ftree.T_to_leaves(T) internal = Ftree.T_to_internal_vertices(T) # get the distinguished vertex of articulation r = get_unique_vertex(N, fs.vertex) if r not in internal: raise ValueError( 'the distinguished vertex should have degree at least two') # Partition the leaves with respect to the given root. # Each set of leaves will eventually define a connected component. R = Ftree.T_to_R_specific(T, r) v_to_sinks = Ftree.R_to_v_to_sinks(R) # break some edges R_pruned = set(R) neighbors = Ftree.T_to_v_to_neighbors(T)[r] for adj in neighbors: R_pruned.remove((r, adj)) T_pruned = Ftree.R_to_T(R_pruned) # get the leaf partition ordered_leaves = [] leaf_lists = [] for adj in neighbors: R_subtree = Ftree.T_to_R_specific(T_pruned, adj) C = sorted(b for a, b in R_subtree if b not in v_to_sinks) ordered_leaves.extend(C) leaf_lists.append(C) # define the vertices to keep and those to remove keepers = ordered_leaves + [r] # get the schur complement L_schur = Ftree.TB_to_L_schur(T, B, keepers) # get principal submatrices of the schur complement principal_matrices = [] accum = 0 for component_leaves in leaf_lists: n = len(component_leaves) M = L_schur[accum:accum+n, accum:accum+n] principal_matrices.append(M) accum += n # write the report out = StringIO() print >> out, 'algebraic connectivity:' print >> out, get_algebraic_connectivity(T, B, leaves) print >> out print >> out print >> out, 'perron values:' print >> out for M, leaf_list in zip(principal_matrices, leaf_lists): value = scipy.linalg.eigh(M, eigvals_only=True)[0] name_list = [N[v] for v in leaf_list] print >> out, name_list print >> out, value print >> out return out.getvalue()
def get_harmonically_extended_MDS(T, B, leaves, internal): """ Use harmonically extended 2D MDS. """ Lbb = Ftree.TB_to_L_block(T, B, internal, internal) Lba = Ftree.TB_to_L_block(T, B, internal, leaves) L_schur = Ftree.TB_to_L_schur(T, B, leaves) W, V = scipy.linalg.eigh(L_schur, eigvals=(1, 2)) V = V * np.reciprocal(np.sqrt(W)) Y = -np.dot(np.dot(np.linalg.pinv(Lbb), Lba), V) return np.vstack([V, Y])
def get_gap(blen, T, B, u_edge): """ This function will be minimized. @param blen: proposed branch length @param T: topology @param B: branch lengths @param u_edge: undirected edge """ if blen <= 0: return 1 leaves = Ftree.T_to_leaves(T) B[u_edge] = blen L_schur = Ftree.TB_to_L_schur(T, B, leaves) ev1, ev2 = scipy.linalg.eigh(L_schur, eigvals_only=True, eigvals=(1, 2)) gap = abs(ev1 - ev2) return gap
def get_v_to_point(self): """ This uses the harmonic extension. Also it uses the reference MDS for reflection. @return: a map from vertex to point """ Lbb = Ftree.TB_to_L_block(self.T, self.B, self.internal, self.internal) Lba = Ftree.TB_to_L_block(self.T, self.B, self.internal, self.leaves) L_schur = Ftree.TB_to_L_schur(self.T, self.B, self.leaves) W, V = scipy.linalg.eigh(L_schur, eigvals=(1, 2)) V = V * np.reciprocal(np.sqrt(W)) V = self._reflected_to_reference(V) Y = -np.dot(np.dot(np.linalg.pinv(Lbb), Lba), V) MDS = np.vstack([V, Y]) vertices = self.leaves + self.internal return dict((vertices[i], tuple(pt)) for i, pt in enumerate(MDS))
def TB_to_harmonic_valuations(T, B): """ @param T: topology @param B: branch lengths @return: a number of dictionaries equal to the number of leaves """ leaves = Ftree.T_to_leaves(T) internal = Ftree.T_to_internal_vertices(T) vertices = leaves + internal nleaves = len(leaves) Lbb = Ftree.TB_to_L_block(T, B, internal, internal) Lba = Ftree.TB_to_L_block(T, B, internal, leaves) L_schur = Ftree.TB_to_L_schur(T, B, leaves) w, v1 = scipy.linalg.eigh(L_schur) v2 = -np.dot(np.dot(np.linalg.pinv(Lbb), Lba), v1) V = np.vstack([v1, v2]) vs = [] for col in range(nleaves): d = dict((v, V[row, col]) for row, v in enumerate(vertices)) vs.append(d) return vs
def get_leaf_distn_schur(R, B): """ This is a possibly equivalent formulation. It is based on removing all internal vertices except the root by Schur complement. """ # Get the vertex order. # This order is different from the acl order. T = Ftree.R_to_T(R) r = Ftree.R_to_root(R) leaves = Ftree.T_to_leaves(T) non_r_internal = [v for v in Ftree.T_to_internal_vertices(T) if v != r] vertices = leaves + [r] + non_r_internal # Get the combinatorial Laplacian matrix # and Schur complement out all of the non-root internal vertices. L_schur = Ftree.TB_to_L_schur(T, B, leaves + [r]) # Get the vector of negative weights between the root and the leaves. w_unnormalized = L_schur[-1, :-1] # Get the normalized weight vector w = w_unnormalized / w_unnormalized.sum() return dict((v, w[i]) for i, v in enumerate(leaves))
def __call__(self, X): """ First few entries of X are logs of branch lengths. Next few entries are vr1 entries. Next few entries are vr2 entries. @param X: a 1D numpy array of floats """ # unpack the parameter array B, Vr = self.X_to_B_Vr(X) L, V = self.X_to_L_V(X) # get the error matrix E = np.dot(L, V) - self.C # compute the squared frobenius norm of the error frob_norm_err = np.sum(E * E) # use a hack to make sure we are really using the first two eigenvalues L_schur = Ftree.TB_to_L_schur(self.T_test, B, self.leaves) w_observed = scipy.linalg.eigvalsh(L_schur, eigvals=(1, 2)) w_error = w_observed - self.w eigenvalue_err = np.sum(w_error * w_error) # return the total error return frob_norm_err + eigenvalue_err
def get_response_content(fs): # read the user input weight_delta_mu = fs.weight_delta_mu T, B, N = FtreeIO.newick_to_TBN(fs.newick) # summarize the tree leaves = Ftree.T_to_leaves(T) internal = Ftree.T_to_internal_vertices(T) vertices = leaves + internal nleaves = len(leaves) # define the fully connected schur complement graph as a Laplacian matrix # init the tree reconstruction state v_to_name = {} for v in leaves: name = N.get(v, None) if name is None: name = 'P' + chr(ord('a') + v) v_to_name[v] = name v_to_svs = dict((v, set([0])) for v in leaves) sv_to_vs = {0: set(leaves)} # define edge weights (used only for spectral split strategy) G = Ftree.TB_to_L_schur(T, B, leaves) # add some random amount to each edge weight for i in range(nleaves): for j in range(i): rate = 1 / fs.weight_delta_mu x = random.expovariate(rate) G[i, j] -= x G[j, i] -= x G[i, i] += x G[j, j] += x edge_to_weight = {} for index_pair in itertools.combinations(range(nleaves), 2): i, j = index_pair leaf_pair = (leaves[i], leaves[j]) edge_to_weight[frozenset(leaf_pair)] = -G[index_pair] # define pairwise distances (used only for nj split strategy) D = Ftree.TB_to_D(T, B, leaves) edge_to_distance = {} for index_pair in itertools.combinations(range(nleaves), 2): i, j = index_pair leaf_pair = (leaves[i], leaves[j]) edge_to_distance[frozenset(leaf_pair)] = D[index_pair] # pairs like (-(number of vertices in supervertex sv), supervertex sv) active_svs = set([0]) # initialize the sources of unique vertex and supervertex identifiers v_gen = itertools.count(max(leaves) + 1) sv_gen = itertools.count(1) # write the output out = StringIO() print >> out, '<html>' print >> out, '<body>' for count_pos in itertools.count(1): # add the graph rendering before the decomposition at this stage if fs.nj_split: edge_to_branch_weight = {} for k, v in edge_to_distance.items(): edge_to_branch_weight[k] = 1 / v elif fs.spectral_split: edge_to_branch_weight = edge_to_weight print >> out, '<div>' if fs.vis_star: print >> out, nhj.get_svg_star_components(active_svs, sv_to_vs, v_to_name, v_to_svs, edge_to_branch_weight) elif fs.vis_complete: print >> out, nhj.get_svg(active_svs, sv_to_vs, v_to_name, v_to_svs, edge_to_branch_weight) print >> out, '</div>' # update the splits next_active_svs = set() # svs can be decomposed independently in arbitrary order alpha_index_gen = itertools.count() for sv in active_svs: nstates = len(sv_to_vs[sv]) if nstates > 2: v_new = next(v_gen) sv_new_a = next(sv_gen) sv_new_b = next(sv_gen) alpha_index = next(alpha_index_gen) alpha = chr(ord('a') + alpha_index) v_to_name[v_new] = 'R%s%s' % (count_pos, alpha) next_active_svs.add(sv_new_a) next_active_svs.add(sv_new_b) if fs.spectral_split: if len(sv_to_vs[sv]) == 3: sv_new_c = next(sv_gen) nhj.delta_wye_transform(sv, v_to_svs, sv_to_vs, edge_to_weight, v_new, sv_new_a, sv_new_b, sv_new_c) next_active_svs.add(sv_new_c) else: nhj.harmonic_split_transform(sv, v_to_svs, sv_to_vs, edge_to_weight, v_new, sv_new_a, sv_new_b) elif fs.nj_split: sv_new_big = next(sv_gen) nhj.nj_split_transform(sv, v_to_svs, sv_to_vs, edge_to_distance, v_new, sv_new_big, sv_new_a, sv_new_b) next_active_svs.add(sv_new_big) elif nstates == 2: next_active_svs.add(sv) else: raise ValueError('supervertex has too few vertices') # if the set of active svs has not changed then we are done if active_svs == next_active_svs: break else: active_svs = next_active_svs print >> out, '</html>' print >> out, '</body>' return out.getvalue()
def get_response_content(fs): T, B, N = FtreeIO.newick_to_TBN(fs.tree) leaves = Ftree.T_to_leaves(T) L_schur = Ftree.TB_to_L_schur(T, B, leaves) mu = scipy.linalg.eigh(L_schur, eigvals_only=True)[1] return str(mu)
def get_response_content(fs): # read the tree R, B, N = FtreeIO.newick_to_RBN(fs.tree) r = Ftree.R_to_root(R) T = Ftree.R_to_T(R) leaves = Ftree.T_to_leaves(T) internal_not_r = [v for v in Ftree.T_to_internal_vertices(T) if v is not r] # define the lists of leaves induced by the root vertex_partition = sorted(Ftree.R_to_vertex_partition(R)) vertex_lists = [sorted(p) for p in vertex_partition] leaf_set = set(leaves) leaf_lists = [sorted(s & leaf_set) for s in vertex_partition] # order the list of leaves in a nice block form leaves = [v for lst in leaf_lists for v in lst] # remove internal vertices by Schur complementation L_schur_rooted = Ftree.TB_to_L_schur(T, B, leaves + [r]) L_schur_full = Ftree.TB_to_L_schur(T, B, leaves) # show the matrix np.set_printoptions(linewidth=132) out = StringIO() # show the rooted schur complement w, v = scipy.linalg.eigh(L_schur_rooted) print >> out, 'rooted Schur complement:' print >> out, L_schur_rooted print >> out, 'Felsenstein weights at the root:' print >> out, -L_schur_rooted[-1][:-1] / L_schur_rooted[-1, -1] print >> out, 'rooted Schur complement eigendecomposition:' print >> out, w print >> out, v print >> out # show the full schur complement w, v = scipy.linalg.eigh(L_schur_full) print >> out, 'full Schur complement:' print >> out, L_schur_full print >> out, 'full Schur complement eigendecomposition:' print >> out, w print >> out, v print >> out # analyze perron components print >> out, 'perron components:' print >> out start = 0 for lst in leaf_lists: n = len(lst) C = L_schur_rooted[start:start + n, start:start + n] print >> out, 'C:' print >> out, C w_eff = np.sum(C) b_eff = 1 / w_eff print >> out, 'effective conductance:' print >> out, w_eff print >> out, 'effective branch length (or resistance or variance):' print >> out, b_eff S = np.linalg.pinv(C) print >> out, 'C^-1 (rooted covariance-like):' print >> out, S w, v = scipy.linalg.eigh(S) print >> out, 'rooted covariance-like eigendecomposition:' print >> out, w print >> out, v print >> out, 'perron value:' print >> out, w[-1] print >> out, 'reciprocal of perron value:' print >> out, 1 / w[-1] print >> out start += n print >> out # analyze subtrees print >> out, 'subtree Laplacian analysis:' print >> out start = 0 for lst in vertex_lists: n = len(lst) C = Ftree.TB_to_L_schur(T, B, lst + [r]) w, v = scipy.linalg.eigh(C) print >> out, 'subtree Laplacian:' print >> out, C print >> out, 'eigendecomposition:' print >> out, w print >> out, v print >> out start += n # analyze subtrees print >> out, 'full Schur complement subtree analysis:' print >> out start = 0 for lst in leaf_lists: n = len(lst) C = Ftree.TB_to_L_schur(T, B, lst + [r]) w, v = scipy.linalg.eigh(C) print >> out, 'full Schur complement in subtree:' print >> out, C print >> out, 'eigendecomposition:' print >> out, w print >> out, v print >> out start += n return out.getvalue()
def get_algebraic_connectivity(T, B, leaves): L_schur = Ftree.TB_to_L_schur(T, B, leaves) w = scipy.linalg.eigh(L_schur, eigvals_only=True) return w[1]
def get_response_content(fs): nseconds_limit = 5.0 R_true, B_true = FtreeIO.newick_to_RB(fs.true_tree, int) R_test = FtreeIO.newick_to_R(fs.test_tree, int) # get the unrooted tree topology T_true = Ftree.R_to_T(R_true) T_test = Ftree.R_to_T(R_test) # check the trees for vertex compatibility if set(Ftree.T_to_order(T_true)) != set(Ftree.T_to_order(T_test)): raise ValueError('vertex sets are not equal') if set(Ftree.T_to_leaves(T_true)) != set(Ftree.T_to_leaves(T_test)): raise ValueError('leaf vertex sets are not equal') if set(Ftree.T_to_internal_vertices(T_true)) != set( Ftree.T_to_internal_vertices(T_test)): raise ValueError('internal vertex sets are not equal') # get the 2D MDS for the true tree leaves = Ftree.T_to_leaves(T_true) internal = Ftree.T_to_internal_vertices(T_true) vertices = leaves + internal L_schur = Ftree.TB_to_L_schur(T_true, B_true, leaves) w_all, Vp_all = scipy.linalg.eigh(L_schur) w, Vp = w_all[1:3], Vp_all[:, 1:3] # make the constant matrix for Frobenius norm comparison C = np.zeros((len(vertices), 2)) C[:len(leaves)] = w * Vp # keep doing iterations until we run out of time mymax = 256 t_initial = time.time() while time.time() - t_initial < nseconds_limit / 2: mymax *= 2 f = Functor(T_test.copy(), Vp.copy(), C.copy(), w.copy()) initial_guess = np.ones(len(T_test) + 2 * len(internal)) results = scipy.optimize.fmin(f, initial_guess, ftol=1e-8, xtol=1e-8, full_output=True, maxfun=mymax, maxiter=mymax) xopt, fopt, itr, funcalls, warnflag = results # look at the values from the longest running iteration B, Vr = f.X_to_B_Vr(xopt) L, V = f.X_to_L_V(xopt) Lrr = Ftree.TB_to_L_block(T_test, B, internal, internal) Lrp = Ftree.TB_to_L_block(T_test, B, internal, leaves) H_ext = -np.dot(np.linalg.pinv(Lrr), Lrp) N = dict((v, str(v)) for v in vertices) # start writing the response out = StringIO() print >> out, 'xopt:', xopt print >> out, 'fopt:', fopt print >> out, 'number of iterations:', itr print >> out, 'number of function calls:', funcalls print >> out, 'warning flags:', warnflag print >> out, 'first four eigenvalues:', w_all[:4] print >> out, 'Vr:' print >> out, Vr print >> out, '-Lrr^-1 Lrp Vp:' print >> out, np.dot(H_ext, Vp) print >> out, C print >> out, np.dot(L, V) print >> out, FtreeIO.RBN_to_newick(R_test, B, N) return out.getvalue()