def get_harmonically_extended_MDS(self): Lbb = Ftree.TB_to_L_block(self.T, self.B, self.internal, self.internal) Lba = Ftree.TB_to_L_block(self.T, self.B, self.internal, self.leaves) L_schur = Ftree.TB_to_L_schur(self.T, self.B, self.leaves) W, V = scipy.linalg.eigh(L_schur, eigvals=(1, 2)) V = V * np.reciprocal(np.sqrt(W)) V = self._reflected_to_reference(V) Y = -np.dot(np.dot(np.linalg.pinv(Lbb), Lba), V) return np.vstack([V, Y])
def get_response_content(fs): # read the trees T_true, B_true, N_true = FtreeIO.newick_to_TBN(fs.true_tree) T_test, B_test, N_test = FtreeIO.newick_to_TBN(fs.test_tree) # we are concerned about the names of the leaves of the two trees true_leaves = Ftree.T_to_leaves(T_true) test_leaves = Ftree.T_to_leaves(T_test) true_leaf_to_n = dict((v, N_true[v]) for v in true_leaves) test_leaf_to_n = dict((v, N_test[v]) for v in test_leaves) # check that all leaves are named if len(true_leaves) != len(true_leaf_to_n): raise ValueError( 'all leaves in the leaf MDS tree should be named') if len(test_leaves) != len(test_leaf_to_n): raise ValueError( 'all leaves in the harmonic extension tree should be named') # check that within each tree all leaves are uniquely named if len(set(true_leaf_to_n.values())) != len(true_leaves): raise ValueError( 'all leaf names in the leaf MDS tree should be unique') if len(set(test_leaf_to_n.values())) != len(test_leaves): raise ValueError( 'all leaf names in the harmonic extension tree ' 'should be unique') # check that the leaf name sets are the same if set(true_leaf_to_n.values()) != set(test_leaf_to_n.values()): raise ValueError( 'the two trees should have corresponding leaf names') # invert the leaf name maps true_n_to_leaf = dict((n, v) for v, n in true_leaf_to_n.items()) test_n_to_leaf = dict((n, v) for v, n in test_leaf_to_n.items()) # get correspondingly ordered leaf sequences leaf_names = true_leaf_to_n.values() true_leaves_reordered = [true_n_to_leaf[n] for n in leaf_names] test_leaves_reordered = [test_n_to_leaf[n] for n in leaf_names] # get the Schur complement matrix for the leaves L_schur_true = Ftree.TB_to_L_schur(T_true, B_true, true_leaves_reordered) # get the MDS points w, V = scipy.linalg.eigh(L_schur_true, eigvals=(1, 2)) X = np.dot(V, np.diag(np.reciprocal(np.sqrt(w)))) # get the linear operator that defines the harmonic extension test_internal = Ftree.T_to_internal_vertices(T_test) L22 = Ftree.TB_to_L_block(T_test, B_test, test_internal, test_internal) L21 = Ftree.TB_to_L_block(T_test, B_test, test_internal, test_leaves_reordered) M = -np.dot(np.linalg.pinv(L22), L21) # get the harmonic extension X_extension = np.dot(M, X) X_extended = np.vstack([X, X_extension]) # draw the image v_to_index = Ftree.invseq(test_leaves_reordered + test_internal) physical_size = (640, 480) ext = Form.g_imageformat_to_ext[fs.imageformat] return get_animation_frame(ext, physical_size, fs.scale, v_to_index, T_test, X_extended)
def get_harmonically_extended_MDS(T, B, leaves, internal): """ Use harmonically extended 2D MDS. """ Lbb = Ftree.TB_to_L_block(T, B, internal, internal) Lba = Ftree.TB_to_L_block(T, B, internal, leaves) L_schur = Ftree.TB_to_L_schur(T, B, leaves) W, V = scipy.linalg.eigh(L_schur, eigvals=(1, 2)) V = V * np.reciprocal(np.sqrt(W)) Y = -np.dot(np.dot(np.linalg.pinv(Lbb), Lba), V) return np.vstack([V, Y])
def get_v_to_point(self): """ This uses the harmonic extension. Also it uses the reference MDS for reflection. @return: a map from vertex to point """ Lbb = Ftree.TB_to_L_block(self.T, self.B, self.internal, self.internal) Lba = Ftree.TB_to_L_block(self.T, self.B, self.internal, self.leaves) L_schur = Ftree.TB_to_L_schur(self.T, self.B, self.leaves) W, V = scipy.linalg.eigh(L_schur, eigvals=(1, 2)) V = V * np.reciprocal(np.sqrt(W)) V = self._reflected_to_reference(V) Y = -np.dot(np.dot(np.linalg.pinv(Lbb), Lba), V) MDS = np.vstack([V, Y]) vertices = self.leaves + self.internal return dict((vertices[i], tuple(pt)) for i, pt in enumerate(MDS))
def get_response_content(fs): # read the ordered leaf names for the distance matrix D_names = Util.get_stripped_lines(fs.names.splitlines()) # read the tree T_test, B_test, N_test = FtreeIO.newick_to_TBN(fs.test_tree) # we are concerned about the names of the leaves of the two trees test_leaves = Ftree.T_to_leaves(T_test) test_leaf_to_n = dict((v, N_test[v]) for v in test_leaves) # check that all leaves are named if len(D_names) != len(fs.D): raise HandlingError('the number of ordered leaf names ' 'should be the same as the number of rows ' 'in the distance matrix') if len(test_leaves) != len(test_leaf_to_n): raise ValueError('all leaves in the harmonic extension tree ' 'should be named') # check that leaves are uniquely named if len(set(D_names)) != len(D_names): raise ValueError('all ordered leaf names in the distance matrix ' 'should be unique') # check that the leaf name sets are the same if set(D_names) != set(test_leaf_to_n.values()): raise ValueError('the set of leaf names on the tree ' 'should be the same as ' 'the set of leaf names for the distance matrix') # invert the leaf name map test_n_to_leaf = dict((n, v) for v, n in test_leaf_to_n.items()) # get correspondingly ordered leaf sequences test_leaves_reordered = [test_n_to_leaf[n] for n in D_names] # get the MDS points X = MDS_v4(fs.D) # get the linear operator that defines the harmonic extension test_internal = Ftree.T_to_internal_vertices(T_test) L22 = Ftree.TB_to_L_block(T_test, B_test, test_internal, test_internal) L21 = Ftree.TB_to_L_block(T_test, B_test, test_internal, test_leaves_reordered) M = -np.dot(np.linalg.pinv(L22), L21) # get the harmonic extension X_extension = np.dot(M, X) X_extended = np.vstack([X, X_extension]) # draw the image v_to_index = Ftree.invseq(test_leaves_reordered + test_internal) physical_size = (640, 480) ext = Form.g_imageformat_to_ext[fs.imageformat] return get_animation_frame(ext, physical_size, fs.scale, v_to_index, T_test, X_extended)
def TB_to_harmonic_valuations(T, B): """ @param T: topology @param B: branch lengths @return: a number of dictionaries equal to the number of leaves """ leaves = Ftree.T_to_leaves(T) internal = Ftree.T_to_internal_vertices(T) vertices = leaves + internal nleaves = len(leaves) Lbb = Ftree.TB_to_L_block(T, B, internal, internal) Lba = Ftree.TB_to_L_block(T, B, internal, leaves) L_schur = Ftree.TB_to_L_schur(T, B, leaves) w, v1 = scipy.linalg.eigh(L_schur) v2 = -np.dot(np.dot(np.linalg.pinv(Lbb), Lba), v1) V = np.vstack([v1, v2]) vs = [] for col in range(nleaves): d = dict((v, V[row, col]) for row, v in enumerate(vertices)) vs.append(d) return vs
def harmonically_interpolate(T, B, v_to_value): """ Use the harmonic extension to augment the v_to_value map. The T and B data is not modified. """ vertices = Ftree.T_to_order(T) # Define the lists of vertices for which the values are known and unknown. known = sorted(v_to_value) unknown = sorted(set(vertices) - set(v_to_value)) # If everything is known then we do not need to interpolate. if not unknown: return # Get pieces of the Laplacian matrix. Lbb = Ftree.TB_to_L_block(T, B, unknown, unknown) Lba = Ftree.TB_to_L_block(T, B, unknown, known) # Get the numpy array of known values. v_known = np.array([v_to_value[v] for v in known]) # Get the numpy array of harmonic extensions to previously unknown values. v_unknown = -np.dot(np.dot(np.linalg.pinv(Lbb), Lba), v_known) # Put the interpolated values into the dictionary. for vertex, value in zip(unknown, v_unknown): v_to_value[vertex] = value
def get_internal_vertex_to_leaf_distn(T, B): """ Return a map from an internal vertex to a leaf distribution. @return: a dictionary that maps an internal vertex to a leaf distribution """ leaves = Ftree.T_to_leaves(T) internal = Ftree.T_to_internal_vertices(T) vertices = leaves + internal # Get pieces of the Laplacian matrix. Lbb = Ftree.TB_to_L_block(T, B, internal, internal) Lba = Ftree.TB_to_L_block(T, B, internal, leaves) # Get the numpy array of harmonic extensions to previously unknown values. interpolator = -np.dot(np.linalg.pinv(Lbb), Lba) #print 'L interpolator:' #print interpolator.shape #print interpolator d = {} for i, v in enumerate(internal): distn = {} for j, leaf in enumerate(leaves): distn[leaf] = interpolator[i, j] d[v] = distn return d
def get_response_content(fs): nseconds_limit = 5.0 R_true, B_true = FtreeIO.newick_to_RB(fs.true_tree, int) R_test = FtreeIO.newick_to_R(fs.test_tree, int) # get the unrooted tree topology T_true = Ftree.R_to_T(R_true) T_test = Ftree.R_to_T(R_test) # check the trees for vertex compatibility if set(Ftree.T_to_order(T_true)) != set(Ftree.T_to_order(T_test)): raise ValueError('vertex sets are not equal') if set(Ftree.T_to_leaves(T_true)) != set(Ftree.T_to_leaves(T_test)): raise ValueError('leaf vertex sets are not equal') if set(Ftree.T_to_internal_vertices(T_true)) != set( Ftree.T_to_internal_vertices(T_test)): raise ValueError('internal vertex sets are not equal') # get the 2D MDS for the true tree leaves = Ftree.T_to_leaves(T_true) internal = Ftree.T_to_internal_vertices(T_true) vertices = leaves + internal L_schur = Ftree.TB_to_L_schur(T_true, B_true, leaves) w_all, Vp_all = scipy.linalg.eigh(L_schur) w, Vp = w_all[1:3], Vp_all[:, 1:3] # make the constant matrix for Frobenius norm comparison C = np.zeros((len(vertices), 2)) C[:len(leaves)] = w * Vp # keep doing iterations until we run out of time mymax = 256 t_initial = time.time() while time.time() - t_initial < nseconds_limit / 2: mymax *= 2 f = Functor(T_test.copy(), Vp.copy(), C.copy(), w.copy()) initial_guess = np.ones(len(T_test) + 2 * len(internal)) results = scipy.optimize.fmin(f, initial_guess, ftol=1e-8, xtol=1e-8, full_output=True, maxfun=mymax, maxiter=mymax) xopt, fopt, itr, funcalls, warnflag = results # look at the values from the longest running iteration B, Vr = f.X_to_B_Vr(xopt) L, V = f.X_to_L_V(xopt) Lrr = Ftree.TB_to_L_block(T_test, B, internal, internal) Lrp = Ftree.TB_to_L_block(T_test, B, internal, leaves) H_ext = -np.dot(np.linalg.pinv(Lrr), Lrp) N = dict((v, str(v)) for v in vertices) # start writing the response out = StringIO() print >> out, 'xopt:', xopt print >> out, 'fopt:', fopt print >> out, 'number of iterations:', itr print >> out, 'number of function calls:', funcalls print >> out, 'warning flags:', warnflag print >> out, 'first four eigenvalues:', w_all[:4] print >> out, 'Vr:' print >> out, Vr print >> out, '-Lrr^-1 Lrp Vp:' print >> out, np.dot(H_ext, Vp) print >> out, C print >> out, np.dot(L, V) print >> out, FtreeIO.RBN_to_newick(R_test, B, N) return out.getvalue()