def get_leaf_distn_acl(R, B): """ This is a possibly equivalent formulation. It is based on Felsenstein weights. """ # Get the vertex order. T = Ftree.R_to_T(R) r = Ftree.R_to_root(R) leaves = Ftree.T_to_leaves(T) non_r_internal = [v for v in Ftree.T_to_internal_vertices(T) if v != r] vertices = leaves + non_r_internal + [r] # Get the pseudoinverse of the Laplacian. # This is also the doubly centered covariance matrix. L = Ftree.TB_to_L_principal(T, B, vertices) HSH = np.linalg.pinv(L) # Decenter the covariance matrix using the root. # This should give the rooted covariance matrix # which is M in the appendix of Weights for Data Related by a Tree # by Altschul, Carroll, and Lipman, 1989. e = np.ones_like(HSH[-1]) J = np.ones_like(HSH) M = HSH - np.outer(e, HSH[-1]) - np.outer(HSH[-1], e) + HSH[-1,-1]*J # Pick out the part corresponding to leaves. nleaves = len(leaves) S = M[:nleaves, :nleaves] S_pinv = np.linalg.pinv(S) # Normalized row or column sums of inverse of M gives the leaf distribution. w = S_pinv.sum(axis=0) / S_pinv.sum() return dict((v, w[i]) for i, v in enumerate(leaves))
def X_to_L_V(self, X): """ Unpack in a way that uses initialized state. """ B, Vr = self.X_to_B_Vr(X) # get the laplacian matrix L = Ftree.TB_to_L_principal(self.T_test, B, self.vertices) # get the augmented vector V = np.vstack([self.Vp, Vr]) # return the unpacked values return L, V
def _get_v_to_point(self): # get the full tree laplacian matrix vertices = Ftree.T_to_order(self.T) L = Ftree.TB_to_L_principal(self.T, self.B, vertices) # get the eigendecomposition by increasing eigenvalue w, vt = scipy.linalg.eigh(L) # get the point valuations of interest x_values = vt.T[1] y_values = vt.T[2] z_values = vt.T[3] points = [np.array(xyz) for xyz in zip(x_values, y_values, z_values)] # get the vertex to point map return dict(zip(vertices, points))
def get_internal_vertex_to_leaf_distn_cov(T, B): """ This is a possibly equivalent formualtion. It is based on Schur complementation in the unrooted covariance matrix. Return a map from an internal vertex to a leaf distribution. @return: a dictionary that maps an internal vertex to a leaf distribution """ leaves = Ftree.T_to_leaves(T) internal = Ftree.T_to_internal_vertices(T) vertices = leaves + internal # Get the full tree Laplacian matrix. L = Ftree.TB_to_L_principal(T, B, vertices) # Get the unrooted covariance matrix. HSH = np.linalg.pinv(L) # Use the multivariate normal distribution wikipedia page # for conditional distributions. nleaves = len(leaves) ninternal = len(internal) # # This interpolator works. #Lbb = L[nleaves:, nleaves:] #Lba = L[nleaves:, :nleaves] #interpolator = -ndot(np.linalg.pinv(Lbb), Lba) # # This interpolator seems like it should work but it does not. Saa = HSH[:nleaves, :nleaves] Sba = HSH[nleaves:, :nleaves] #print 'det(Saa)' #print np.linalg.det(Saa) interpolator = ndot(Sba, np.linalg.pinv(Saa)) # # Try a hack. #eps = 1e-12 #nvertices = len(vertices) #J = np.ones((nvertices, nvertices)) #Saa = (HSH + J)[:nleaves, :nleaves] #Sba = (HSH + J)[nleaves:, :nleaves] #interpolator = ndot(Sba, np.linalg.pinv(Saa)) # #print 'cov interpolator:' #print interpolator.shape #print interpolator d = {} for i, v in enumerate(internal): distn = {} for j, leaf in enumerate(leaves): distn[leaf] = interpolator[i, j] d[v] = distn return d
def get_response_content(fs): # read the tree T, B, N = FtreeIO.newick_to_TBN(fs.tree) leaves = Ftree.T_to_leaves(T) internal = Ftree.T_to_internal_vertices(T) # root arbitrarily R = Ftree.T_to_R_canonical(T) # init some sampling parameters npillars = 9 # init some helper variables nleaves = len(leaves) r = get_new_vertex(T) vertices = internal + [r] + leaves combo = np.array([0] * len(internal) + [1] + [-1.0 / nleaves] * nleaves) # Map edge position triple to the quadratic form value. qform = {} for d_edge in R: a, b = d_edge u_edge = frozenset(d_edge) distance = B[u_edge] for i in range(npillars): # get the proportion of the distance along the branch t = (i + 1) / float(npillars + 1) T_new, B_new = add_vertex(T, B, d_edge, r, t) # create the new centered covariance matrix L = Ftree.TB_to_L_principal(T_new, B_new, vertices) S = np.linalg.pinv(L) qform[(a, b, t * distance)] = quadratic_form(S, combo) #shortcombo = np.array([1] + [-1.0/nleaves]*nleaves) #shortvert = [r] + leaves #L_schur = Ftree.TB_to_L_schur(T_new, B_new, shortvert) #S = np.linalg.pinv(L_schur) #qform[(a, b, t*distance)] = quadratic_form(S, shortcombo) wat = sorted((val, va, vb, d) for (va, vb, d), val in qform.items()) # write the report out = StringIO() for val, va, vb, d in wat: print >> out, N[va], '--[', d, ']-->', N[vb], ':', val print >> out return out.getvalue()