def equal_arc_layout(T, B): """ @param T: tree topology @param B: branch lengths @return: a map from vertex to location """ # arbitrarily root the tree R = Ftree.T_to_R_canonical(T) r = Ftree.R_to_root(R) # map vertices to subtree tip count v_to_sinks = Ftree.R_to_v_to_sinks(R) v_to_count = {} for v in Ftree.R_to_postorder(R): sinks = v_to_sinks.get(v, []) if sinks: v_to_count[v] = sum(v_to_count[sink] for sink in sinks) else: v_to_count[v] = 1 # create the equal arc angles v_to_theta = {} _force_equal_arcs( v_to_sinks, v_to_count, v_to_theta, r, -math.pi, math.pi) # convert angles to coordinates v_to_source = Ftree.R_to_v_to_source(R) v_to_location = {} _update_locations( R, B, v_to_source, v_to_sinks, v_to_theta, v_to_location, r, (0, 0), 0) return v_to_location
def R_to_newick(R): """ @param R: a directed topology @return: a newick string """ r = Ftree.R_to_root(R) return _v_to_newick(Ftree.R_to_v_to_sinks(R), r) + ';'
def test_leaf_distn_a(self): # Read the example tree. example_tree = '(a:2, (b:1, c:1, d:1, e:1)x:1)y;' R, B, N = FtreeIO.newick_to_RBN(example_tree) T = Ftree.R_to_T(R) r = Ftree.R_to_root(R) # Get the leaf distribution associated with the root. internal_to_leaf_distn = get_internal_vertex_to_leaf_distn(T, B) r_to_leaf_distn = internal_to_leaf_distn[r] leaves = Ftree.T_to_leaves(T) observed_name_weight_pairs = [ (N[v], r_to_leaf_distn[v]) for v in leaves] # Set up the expectation for the test. n = 5.0 expected_name_weight_pairs = [] expected_first_value = n / (3*n - 2) expected_non_first_value = 2 / (3*n - 2) expected_name_weight_pairs.append(('a', expected_first_value)) for name in list('bcde'): expected_name_weight_pairs.append((name, expected_non_first_value)) # Do the comparison for testing. expected_d = dict(expected_name_weight_pairs) observed_d = dict(observed_name_weight_pairs) for v in leaves: name = N[v] expected_value = expected_d[name] observed_value = observed_d[name] self.assertTrue(np.allclose(expected_value, observed_value))
def equal_daylight_layout(T, B, iteration_count): """ @param T: topology @param B: branch lengths """ R = Ftree.T_to_R_canonical(T) r = Ftree.R_to_root(R) # create the initial equal arc layout v_to_location = equal_arc_layout(T, B) # use sax-like events to create a parallel tree in the C extension v_to_sinks = Ftree.R_to_v_to_sinks(R) v_to_dtree_id = {} dtree = day.Day() count = _build_dtree( dtree, r, v_to_sinks, v_to_location, v_to_dtree_id, 0) # repeatedly reroot and equalize v_to_neighbors = Ftree.T_to_v_to_neighbors(T) for i in range(iteration_count): for v in Ftree.T_to_inside_out(T): neighbor_count = len(v_to_neighbors[v]) if neighbor_count > 2: dtree.select_node(v_to_dtree_id[v]) dtree.reroot() dtree.equalize() # extract the x and y coordinates from the dtree v_to_location = {} for v, dtree_id in v_to_dtree_id.items(): dtree.select_node(dtree_id) x = dtree.get_x() y = dtree.get_y() v_to_location[v] = (x, y) return v_to_location
def get_leaf_distn_acl(R, B): """ This is a possibly equivalent formulation. It is based on Felsenstein weights. """ # Get the vertex order. T = Ftree.R_to_T(R) r = Ftree.R_to_root(R) leaves = Ftree.T_to_leaves(T) non_r_internal = [v for v in Ftree.T_to_internal_vertices(T) if v != r] vertices = leaves + non_r_internal + [r] # Get the pseudoinverse of the Laplacian. # This is also the doubly centered covariance matrix. L = Ftree.TB_to_L_principal(T, B, vertices) HSH = np.linalg.pinv(L) # Decenter the covariance matrix using the root. # This should give the rooted covariance matrix # which is M in the appendix of Weights for Data Related by a Tree # by Altschul, Carroll, and Lipman, 1989. e = np.ones_like(HSH[-1]) J = np.ones_like(HSH) M = HSH - np.outer(e, HSH[-1]) - np.outer(HSH[-1], e) + HSH[-1,-1]*J # Pick out the part corresponding to leaves. nleaves = len(leaves) S = M[:nleaves, :nleaves] S_pinv = np.linalg.pinv(S) # Normalized row or column sums of inverse of M gives the leaf distribution. w = S_pinv.sum(axis=0) / S_pinv.sum() return dict((v, w[i]) for i, v in enumerate(leaves))
def RB_to_newick(R, B): """ @param R: a directed topology @param B: branch lengths @return: a newick string """ r = Ftree.R_to_root(R) v_to_source = Ftree.R_to_v_to_source(R) v_to_sinks = Ftree.R_to_v_to_sinks(R) return _Bv_to_newick(v_to_source, v_to_sinks, B, r) + ';'
def RBN_to_newick(R, B, N): """ @param R: a directed topology @param B: branch lengths @param N: map from vertices to names @return: a newick string """ r = Ftree.R_to_root(R) v_to_source = Ftree.R_to_v_to_source(R) v_to_sinks = Ftree.R_to_v_to_sinks(R) return _BNv_to_newick(v_to_source, v_to_sinks, B, N, r) + ';'
def sample_brownian_motion(R, B): """ Sample brownian motion on a tree. @param R: directed tree @param B: branch lengths @return: map from vertex to sample """ r = Ftree.R_to_root(R) v_to_sample = {r: 0} v_to_sinks = Ftree.R_to_v_to_sinks(R) for v in Ftree.R_to_preorder(R): for sink in v_to_sinks[v]: u_edge = frozenset((v, sink)) mu = v_to_sample[v] var = B[u_edge] v_to_sample[sink] = random.gauss(mu, math.sqrt(var)) return v_to_sample
def test_leaf_distn_schur(self): # Read the example tree. example_tree = LeafWeights.g_acl_tree R, B, N = FtreeIO.newick_to_RBN(example_tree) T = Ftree.R_to_T(R) r = Ftree.R_to_root(R) # Get the leaf distribution associated with the root. leaf_distn = get_leaf_distn_schur(R, B) leaves = Ftree.T_to_leaves(T) observed_name_weight_pairs = [ (N[v], leaf_distn[v]) for v in leaves] # Do the comparison for testing. observed_name_to_weight = dict(observed_name_weight_pairs) for name in LeafWeights.g_acl_ordered_names: s_expected = LeafWeights.g_acl_expected_weights[name] s_observed = '%.3f' % observed_name_to_weight[name] self.assertEqual(s_expected, s_observed)
def get_leaf_distn_schur(R, B): """ This is a possibly equivalent formulation. It is based on removing all internal vertices except the root by Schur complement. """ # Get the vertex order. # This order is different from the acl order. T = Ftree.R_to_T(R) r = Ftree.R_to_root(R) leaves = Ftree.T_to_leaves(T) non_r_internal = [v for v in Ftree.T_to_internal_vertices(T) if v != r] vertices = leaves + [r] + non_r_internal # Get the combinatorial Laplacian matrix # and Schur complement out all of the non-root internal vertices. L_schur = Ftree.TB_to_L_schur(T, B, leaves + [r]) # Get the vector of negative weights between the root and the leaves. w_unnormalized = L_schur[-1, :-1] # Get the normalized weight vector w = w_unnormalized / w_unnormalized.sum() return dict((v, w[i]) for i, v in enumerate(leaves))
def get_response_content(fs): # init the response and get the user variables out = StringIO() nleaves = fs.nleaves nvertices = nleaves * 2 - 1 nbranches = nvertices - 1 nsites = fs.nsites # sample the coalescent tree with timelike branch lengths R, B = kingman.sample(fs.nleaves) r = Ftree.R_to_root(R) # get the leaf vertex names N = dict(zip(range(nleaves), string.uppercase[:nleaves])) N_leaves = dict(N) # get the internal vertex names v_to_leaves = R_to_v_to_leaves(R) for v, leaves in sorted(v_to_leaves.items()): if len(leaves) > 1: N[v] = ''.join(sorted(N[leaf] for leaf in leaves)) # get vertex ages v_to_age = kingman.RB_to_v_to_age(R, B) # sample the rates on the branches b_to_rate = sample_b_to_rate(R) xycorr = get_correlation(R, b_to_rate) # define B_subs in terms of substitutions instead of time B_subs = dict((p, t * b_to_rate[p]) for p, t in B.items()) # sample the alignment v_to_seq = sample_v_to_seq(R, B_subs, nsites) # get the log likelihood; this is kind of horrible pairs = [(N[v], ''.join(v_to_seq[v])) for v in range(nleaves)] headers, sequences = zip(*pairs) alignment = Fasta.create_alignment(headers, sequences) newick_string = FtreeIO.RBN_to_newick(R, B_subs, N_leaves) tree = Newick.parse(newick_string, Newick.NewickTree) dictionary_rate_matrix = RateMatrix.get_jukes_cantor_rate_matrix() ordered_states = list('ACGT') row_major_rate_matrix = MatrixUtil.dict_to_row_major( dictionary_rate_matrix, ordered_states, ordered_states) rate_matrix_object = RateMatrix.RateMatrix( row_major_rate_matrix, ordered_states) ll = PhyLikelihood.get_log_likelihood( tree, alignment, rate_matrix_object) # get ll when rates are all 1.0 newick_string = FtreeIO.RBN_to_newick(R, B, N_leaves) tree = Newick.parse(newick_string, Newick.NewickTree) ll_unity = PhyLikelihood.get_log_likelihood( tree, alignment, rate_matrix_object) # get ll when rates are numerically optimized # TODO incorporate the result into the xml file # TODO speed up the likelihood evaluation (beagle? C module?) #f = Opt(R, B, N_leaves, alignment) #X_logs = [0.0] * nbranches #result = scipy.optimize.fmin(f, X_logs, full_output=True) #print result # print >> out, '<?xml version="1.0"?>' print >> out, '<beast>' print >> out print >> out, '<!-- actual rate autocorrelation', xycorr, '-->' print >> out, '<!-- actual root height', v_to_age[r], '-->' print >> out, '<!-- actual log likelihood', ll, '-->' print >> out, '<!-- ll if rates were unity', ll_unity, '-->' print >> out print >> out, '<!--' print >> out, 'predefine the taxa as in' print >> out, 'http://beast.bio.ed.ac.uk/Introduction_to_XML_format' print >> out, '-->' print >> out, get_leaf_taxon_defn(list(string.uppercase[:nleaves])) print >> out print >> out, '<!--' print >> out, 'define the alignment as in' print >> out, 'http://beast.bio.ed.ac.uk/Introduction_to_XML_format' print >> out, '-->' print >> out, get_alignment_defn(leaves, N, v_to_seq) print >> out print >> out, '<!--' print >> out, 'specify the starting tree as in' print >> out, 'http://beast.bio.ed.ac.uk/Tutorial_4' print >> out, '-->' print >> out, get_starting_tree_defn(R, B, N_leaves) print >> out print >> out, '<!--' print >> out, 'connect the tree model as in' print >> out, 'http://beast.bio.ed.ac.uk/Tutorial_4' print >> out, '-->' print >> out, g_tree_model_defn print >> out print >> out, g_uncorrelated_relaxed_clock_info print >> out """ print >> out, '<!--' print >> out, 'create a list of taxa for which to constrain the mrca as in' print >> out, 'http://beast.bio.ed.ac.uk/Tutorial_3.1' print >> out, '-->' for v, leaves in sorted(v_to_leaves.items()): if len(leaves) > 1: print >> out, get_mrca_subset_defn(N, v, leaves) print >> out print >> out, '<!--' print >> out, 'create a tmrcaStatistic that will record the height as in' print >> out, 'http://beast.bio.ed.ac.uk/Tutorial_3.1' print >> out, '-->' for v, leaves in sorted(v_to_leaves.items()): if len(leaves) > 1: print >> out, get_mrca_stat_defn(N[v]) """ print >> out print >> out, g_likelihood_info print >> out print >> out, '<!--' print >> out, 'run the mcmc' print >> out, 'http://beast.bio.ed.ac.uk/Tutorial_3.1' print >> out, '-->' print >> out, get_mcmc_defn(v_to_leaves, v_to_age, N) print >> out print >> out, '</beast>' # return the response return out.getvalue()
def get_response_content(fs): # read the tree R, B, N = FtreeIO.newick_to_RBN(fs.tree) r = Ftree.R_to_root(R) T = Ftree.R_to_T(R) leaves = Ftree.T_to_leaves(T) internal_not_r = [v for v in Ftree.T_to_internal_vertices(T) if v is not r] # define the lists of leaves induced by the root vertex_partition = sorted(Ftree.R_to_vertex_partition(R)) vertex_lists = [sorted(p) for p in vertex_partition] leaf_set = set(leaves) leaf_lists = [sorted(s & leaf_set) for s in vertex_partition] # order the list of leaves in a nice block form leaves = [v for lst in leaf_lists for v in lst] # remove internal vertices by Schur complementation L_schur_rooted = Ftree.TB_to_L_schur(T, B, leaves + [r]) L_schur_full = Ftree.TB_to_L_schur(T, B, leaves) # show the matrix np.set_printoptions(linewidth=132) out = StringIO() # show the rooted schur complement w, v = scipy.linalg.eigh(L_schur_rooted) print >> out, 'rooted Schur complement:' print >> out, L_schur_rooted print >> out, 'Felsenstein weights at the root:' print >> out, -L_schur_rooted[-1][:-1] / L_schur_rooted[-1, -1] print >> out, 'rooted Schur complement eigendecomposition:' print >> out, w print >> out, v print >> out # show the full schur complement w, v = scipy.linalg.eigh(L_schur_full) print >> out, 'full Schur complement:' print >> out, L_schur_full print >> out, 'full Schur complement eigendecomposition:' print >> out, w print >> out, v print >> out # analyze perron components print >> out, 'perron components:' print >> out start = 0 for lst in leaf_lists: n = len(lst) C = L_schur_rooted[start:start + n, start:start + n] print >> out, 'C:' print >> out, C w_eff = np.sum(C) b_eff = 1 / w_eff print >> out, 'effective conductance:' print >> out, w_eff print >> out, 'effective branch length (or resistance or variance):' print >> out, b_eff S = np.linalg.pinv(C) print >> out, 'C^-1 (rooted covariance-like):' print >> out, S w, v = scipy.linalg.eigh(S) print >> out, 'rooted covariance-like eigendecomposition:' print >> out, w print >> out, v print >> out, 'perron value:' print >> out, w[-1] print >> out, 'reciprocal of perron value:' print >> out, 1 / w[-1] print >> out start += n print >> out # analyze subtrees print >> out, 'subtree Laplacian analysis:' print >> out start = 0 for lst in vertex_lists: n = len(lst) C = Ftree.TB_to_L_schur(T, B, lst + [r]) w, v = scipy.linalg.eigh(C) print >> out, 'subtree Laplacian:' print >> out, C print >> out, 'eigendecomposition:' print >> out, w print >> out, v print >> out start += n # analyze subtrees print >> out, 'full Schur complement subtree analysis:' print >> out start = 0 for lst in leaf_lists: n = len(lst) C = Ftree.TB_to_L_schur(T, B, lst + [r]) w, v = scipy.linalg.eigh(C) print >> out, 'full Schur complement in subtree:' print >> out, C print >> out, 'eigendecomposition:' print >> out, w print >> out, v print >> out start += n return out.getvalue()
def finish(self): r = Ftree.R_to_root(self.R) if r in self.v_to_hanging_length: raise FtreeIOError('the root should not have a hanging branch')