Exemple #1
0
def equal_arc_layout(T, B):
    """
    @param T: tree topology
    @param B: branch lengths
    @return: a map from vertex to location
    """
    # arbitrarily root the tree
    R = Ftree.T_to_R_canonical(T)
    r = Ftree.R_to_root(R)
    # map vertices to subtree tip count
    v_to_sinks = Ftree.R_to_v_to_sinks(R)
    v_to_count = {}
    for v in Ftree.R_to_postorder(R):
        sinks = v_to_sinks.get(v, [])
        if sinks:
            v_to_count[v] = sum(v_to_count[sink] for sink in sinks)
        else:
            v_to_count[v] = 1
    # create the equal arc angles
    v_to_theta = {}
    _force_equal_arcs(
            v_to_sinks, v_to_count, v_to_theta,
            r, -math.pi, math.pi)
    # convert angles to coordinates
    v_to_source = Ftree.R_to_v_to_source(R)
    v_to_location = {}
    _update_locations(
            R, B,
            v_to_source, v_to_sinks, v_to_theta, v_to_location,
            r, (0, 0), 0)
    return v_to_location
Exemple #2
0
def R_to_newick(R):
    """
    @param R: a directed topology
    @return: a newick string
    """
    r = Ftree.R_to_root(R)
    return _v_to_newick(Ftree.R_to_v_to_sinks(R), r) + ';'
Exemple #3
0
 def test_leaf_distn_a(self):
     # Read the example tree.
     example_tree = '(a:2, (b:1, c:1, d:1, e:1)x:1)y;'
     R, B, N = FtreeIO.newick_to_RBN(example_tree)
     T = Ftree.R_to_T(R)
     r = Ftree.R_to_root(R)
     # Get the leaf distribution associated with the root.
     internal_to_leaf_distn = get_internal_vertex_to_leaf_distn(T, B)
     r_to_leaf_distn = internal_to_leaf_distn[r]
     leaves = Ftree.T_to_leaves(T)
     observed_name_weight_pairs = [
             (N[v], r_to_leaf_distn[v]) for v in leaves]
     # Set up the expectation for the test.
     n = 5.0
     expected_name_weight_pairs = []
     expected_first_value = n / (3*n - 2)
     expected_non_first_value = 2 / (3*n - 2)
     expected_name_weight_pairs.append(('a', expected_first_value))
     for name in list('bcde'):
         expected_name_weight_pairs.append((name, expected_non_first_value))
     # Do the comparison for testing.
     expected_d = dict(expected_name_weight_pairs)
     observed_d = dict(observed_name_weight_pairs)
     for v in leaves:
         name = N[v]
         expected_value = expected_d[name]
         observed_value = observed_d[name]
         self.assertTrue(np.allclose(expected_value, observed_value))
Exemple #4
0
def equal_daylight_layout(T, B, iteration_count):
    """
    @param T: topology
    @param B: branch lengths
    """
    R = Ftree.T_to_R_canonical(T)
    r = Ftree.R_to_root(R)
    # create the initial equal arc layout
    v_to_location = equal_arc_layout(T, B)
    # use sax-like events to create a parallel tree in the C extension
    v_to_sinks = Ftree.R_to_v_to_sinks(R)
    v_to_dtree_id = {}
    dtree = day.Day()
    count = _build_dtree(
            dtree, r, v_to_sinks, v_to_location, v_to_dtree_id, 0)
    # repeatedly reroot and equalize
    v_to_neighbors = Ftree.T_to_v_to_neighbors(T)
    for i in range(iteration_count):
        for v in Ftree.T_to_inside_out(T):
            neighbor_count = len(v_to_neighbors[v])
            if neighbor_count > 2:
                dtree.select_node(v_to_dtree_id[v])
                dtree.reroot()
                dtree.equalize()
    # extract the x and y coordinates from the dtree
    v_to_location = {}
    for v, dtree_id in v_to_dtree_id.items():
        dtree.select_node(dtree_id)
        x = dtree.get_x()
        y = dtree.get_y()
        v_to_location[v] = (x, y)
    return v_to_location
Exemple #5
0
def get_leaf_distn_acl(R, B):
    """
    This is a possibly equivalent formulation.
    It is based on Felsenstein weights.
    """
    # Get the vertex order.
    T = Ftree.R_to_T(R)
    r = Ftree.R_to_root(R)
    leaves = Ftree.T_to_leaves(T)
    non_r_internal = [v for v in Ftree.T_to_internal_vertices(T) if v != r]
    vertices = leaves + non_r_internal + [r]
    # Get the pseudoinverse of the Laplacian.
    # This is also the doubly centered covariance matrix.
    L = Ftree.TB_to_L_principal(T, B, vertices)
    HSH = np.linalg.pinv(L)
    # Decenter the covariance matrix using the root.
    # This should give the rooted covariance matrix
    # which is M in the appendix of Weights for Data Related by a Tree
    # by Altschul, Carroll, and Lipman, 1989.
    e = np.ones_like(HSH[-1])
    J = np.ones_like(HSH)
    M = HSH - np.outer(e, HSH[-1]) - np.outer(HSH[-1], e) + HSH[-1,-1]*J
    # Pick out the part corresponding to leaves.
    nleaves = len(leaves)
    S = M[:nleaves, :nleaves]
    S_pinv = np.linalg.pinv(S)
    # Normalized row or column sums of inverse of M gives the leaf distribution.
    w = S_pinv.sum(axis=0) / S_pinv.sum()
    return dict((v, w[i]) for i, v in enumerate(leaves))
Exemple #6
0
def RB_to_newick(R, B):
    """
    @param R: a directed topology
    @param B: branch lengths
    @return: a newick string
    """
    r = Ftree.R_to_root(R)
    v_to_source = Ftree.R_to_v_to_source(R)
    v_to_sinks = Ftree.R_to_v_to_sinks(R)
    return _Bv_to_newick(v_to_source, v_to_sinks, B, r) + ';'
Exemple #7
0
def RBN_to_newick(R, B, N):
    """
    @param R: a directed topology
    @param B: branch lengths
    @param N: map from vertices to names
    @return: a newick string
    """
    r = Ftree.R_to_root(R)
    v_to_source = Ftree.R_to_v_to_source(R)
    v_to_sinks = Ftree.R_to_v_to_sinks(R)
    return _BNv_to_newick(v_to_source, v_to_sinks, B, N, r) + ';'
Exemple #8
0
def sample_brownian_motion(R, B):
    """
    Sample brownian motion on a tree.
    @param R: directed tree
    @param B: branch lengths
    @return: map from vertex to sample
    """
    r = Ftree.R_to_root(R)
    v_to_sample = {r: 0}
    v_to_sinks = Ftree.R_to_v_to_sinks(R)
    for v in Ftree.R_to_preorder(R):
        for sink in v_to_sinks[v]:
            u_edge = frozenset((v, sink))
            mu = v_to_sample[v]
            var = B[u_edge]
            v_to_sample[sink] = random.gauss(mu, math.sqrt(var))
    return v_to_sample
Exemple #9
0
 def test_leaf_distn_schur(self):
     # Read the example tree.
     example_tree = LeafWeights.g_acl_tree
     R, B, N = FtreeIO.newick_to_RBN(example_tree)
     T = Ftree.R_to_T(R)
     r = Ftree.R_to_root(R)
     # Get the leaf distribution associated with the root.
     leaf_distn = get_leaf_distn_schur(R, B)
     leaves = Ftree.T_to_leaves(T)
     observed_name_weight_pairs = [
             (N[v], leaf_distn[v]) for v in leaves]
     # Do the comparison for testing.
     observed_name_to_weight = dict(observed_name_weight_pairs)
     for name in LeafWeights.g_acl_ordered_names:
         s_expected = LeafWeights.g_acl_expected_weights[name]
         s_observed = '%.3f' % observed_name_to_weight[name]
         self.assertEqual(s_expected, s_observed)
Exemple #10
0
def get_leaf_distn_schur(R, B):
    """
    This is a possibly equivalent formulation.
    It is based on removing all internal vertices except the root
    by Schur complement.
    """
    # Get the vertex order.
    # This order is different from the acl order.
    T = Ftree.R_to_T(R)
    r = Ftree.R_to_root(R)
    leaves = Ftree.T_to_leaves(T)
    non_r_internal = [v for v in Ftree.T_to_internal_vertices(T) if v != r]
    vertices = leaves + [r] + non_r_internal
    # Get the combinatorial Laplacian matrix
    # and Schur complement out all of the non-root internal vertices.
    L_schur = Ftree.TB_to_L_schur(T, B, leaves + [r])
    # Get the vector of negative weights between the root and the leaves.
    w_unnormalized = L_schur[-1, :-1]
    # Get the normalized weight vector
    w = w_unnormalized / w_unnormalized.sum()
    return dict((v, w[i]) for i, v in enumerate(leaves))
Exemple #11
0
def get_response_content(fs):
    # init the response and get the user variables
    out = StringIO()
    nleaves = fs.nleaves
    nvertices = nleaves * 2 - 1
    nbranches = nvertices - 1
    nsites = fs.nsites
    # sample the coalescent tree with timelike branch lengths
    R, B = kingman.sample(fs.nleaves)
    r = Ftree.R_to_root(R)
    # get the leaf vertex names
    N = dict(zip(range(nleaves), string.uppercase[:nleaves]))
    N_leaves = dict(N)
    # get the internal vertex names
    v_to_leaves = R_to_v_to_leaves(R)
    for v, leaves in sorted(v_to_leaves.items()):
        if len(leaves) > 1:
            N[v] = ''.join(sorted(N[leaf] for leaf in leaves))
    # get vertex ages
    v_to_age = kingman.RB_to_v_to_age(R, B)
    # sample the rates on the branches
    b_to_rate = sample_b_to_rate(R)
    xycorr = get_correlation(R, b_to_rate)
    # define B_subs in terms of substitutions instead of time
    B_subs = dict((p, t * b_to_rate[p]) for p, t in B.items())
    # sample the alignment
    v_to_seq = sample_v_to_seq(R, B_subs, nsites)
    # get the log likelihood; this is kind of horrible
    pairs = [(N[v], ''.join(v_to_seq[v])) for v in range(nleaves)]
    headers, sequences = zip(*pairs)
    alignment = Fasta.create_alignment(headers, sequences)
    newick_string = FtreeIO.RBN_to_newick(R, B_subs, N_leaves)
    tree = Newick.parse(newick_string, Newick.NewickTree)
    dictionary_rate_matrix = RateMatrix.get_jukes_cantor_rate_matrix() 
    ordered_states = list('ACGT') 
    row_major_rate_matrix = MatrixUtil.dict_to_row_major(
            dictionary_rate_matrix, ordered_states, ordered_states)
    rate_matrix_object = RateMatrix.RateMatrix(
            row_major_rate_matrix, ordered_states) 
    ll = PhyLikelihood.get_log_likelihood(
            tree, alignment, rate_matrix_object)
    # get ll when rates are all 1.0
    newick_string = FtreeIO.RBN_to_newick(R, B, N_leaves)
    tree = Newick.parse(newick_string, Newick.NewickTree)
    ll_unity = PhyLikelihood.get_log_likelihood(
            tree, alignment, rate_matrix_object)
    # get ll when rates are numerically optimized
    # TODO incorporate the result into the xml file
    # TODO speed up the likelihood evaluation (beagle? C module?)
    #f = Opt(R, B, N_leaves, alignment)
    #X_logs = [0.0] * nbranches
    #result = scipy.optimize.fmin(f, X_logs, full_output=True)
    #print result
    #
    print >> out, '<?xml version="1.0"?>'
    print >> out, '<beast>'
    print >> out
    print >> out, '<!-- actual rate autocorrelation', xycorr, '-->'
    print >> out, '<!-- actual root height', v_to_age[r], '-->'
    print >> out, '<!-- actual log likelihood', ll, '-->'
    print >> out, '<!-- ll if rates were unity', ll_unity, '-->'
    print >> out
    print >> out, '<!--'
    print >> out, 'predefine the taxa as in'
    print >> out, 'http://beast.bio.ed.ac.uk/Introduction_to_XML_format'
    print >> out, '-->'
    print >> out, get_leaf_taxon_defn(list(string.uppercase[:nleaves]))
    print >> out
    print >> out, '<!--'
    print >> out, 'define the alignment as in'
    print >> out, 'http://beast.bio.ed.ac.uk/Introduction_to_XML_format'
    print >> out, '-->'
    print >> out, get_alignment_defn(leaves, N, v_to_seq)
    print >> out
    print >> out, '<!--'
    print >> out, 'specify the starting tree as in'
    print >> out, 'http://beast.bio.ed.ac.uk/Tutorial_4'
    print >> out, '-->'
    print >> out, get_starting_tree_defn(R, B, N_leaves)
    print >> out
    print >> out, '<!--'
    print >> out, 'connect the tree model as in'
    print >> out, 'http://beast.bio.ed.ac.uk/Tutorial_4'
    print >> out, '-->'
    print >> out, g_tree_model_defn
    print >> out
    print >> out, g_uncorrelated_relaxed_clock_info
    print >> out
    """
    print >> out, '<!--'
    print >> out, 'create a list of taxa for which to constrain the mrca as in'
    print >> out, 'http://beast.bio.ed.ac.uk/Tutorial_3.1'
    print >> out, '-->'
    for v, leaves in sorted(v_to_leaves.items()):
        if len(leaves) > 1:
            print >> out, get_mrca_subset_defn(N, v, leaves)
    print >> out
    print >> out, '<!--'
    print >> out, 'create a tmrcaStatistic that will record the height as in'
    print >> out, 'http://beast.bio.ed.ac.uk/Tutorial_3.1'
    print >> out, '-->'
    for v, leaves in sorted(v_to_leaves.items()):
        if len(leaves) > 1:
            print >> out, get_mrca_stat_defn(N[v])
    """
    print >> out
    print >> out, g_likelihood_info
    print >> out
    print >> out, '<!--'
    print >> out, 'run the mcmc'
    print >> out, 'http://beast.bio.ed.ac.uk/Tutorial_3.1'
    print >> out, '-->'
    print >> out, get_mcmc_defn(v_to_leaves, v_to_age, N)
    print >> out
    print >> out, '</beast>'
    # return the response
    return out.getvalue()
Exemple #12
0
def get_response_content(fs):
    # read the tree
    R, B, N = FtreeIO.newick_to_RBN(fs.tree)
    r = Ftree.R_to_root(R)
    T = Ftree.R_to_T(R)
    leaves = Ftree.T_to_leaves(T)
    internal_not_r = [v for v in Ftree.T_to_internal_vertices(T) if v is not r]
    # define the lists of leaves induced by the root
    vertex_partition = sorted(Ftree.R_to_vertex_partition(R))
    vertex_lists = [sorted(p) for p in vertex_partition]
    leaf_set = set(leaves)
    leaf_lists = [sorted(s & leaf_set) for s in vertex_partition]
    # order the list of leaves in a nice block form
    leaves = [v for lst in leaf_lists for v in lst]
    # remove internal vertices by Schur complementation
    L_schur_rooted = Ftree.TB_to_L_schur(T, B, leaves + [r])
    L_schur_full = Ftree.TB_to_L_schur(T, B, leaves)
    # show the matrix
    np.set_printoptions(linewidth=132)
    out = StringIO()
    # show the rooted schur complement
    w, v = scipy.linalg.eigh(L_schur_rooted)
    print >> out, 'rooted Schur complement:'
    print >> out, L_schur_rooted
    print >> out, 'Felsenstein weights at the root:'
    print >> out, -L_schur_rooted[-1][:-1] / L_schur_rooted[-1, -1]
    print >> out, 'rooted Schur complement eigendecomposition:'
    print >> out, w
    print >> out, v
    print >> out
    # show the full schur complement
    w, v = scipy.linalg.eigh(L_schur_full)
    print >> out, 'full Schur complement:'
    print >> out, L_schur_full
    print >> out, 'full Schur complement eigendecomposition:'
    print >> out, w
    print >> out, v
    print >> out
    # analyze perron components
    print >> out, 'perron components:'
    print >> out
    start = 0
    for lst in leaf_lists:
        n = len(lst)
        C = L_schur_rooted[start:start + n, start:start + n]
        print >> out, 'C:'
        print >> out, C
        w_eff = np.sum(C)
        b_eff = 1 / w_eff
        print >> out, 'effective conductance:'
        print >> out, w_eff
        print >> out, 'effective branch length (or resistance or variance):'
        print >> out, b_eff
        S = np.linalg.pinv(C)
        print >> out, 'C^-1 (rooted covariance-like):'
        print >> out, S
        w, v = scipy.linalg.eigh(S)
        print >> out, 'rooted covariance-like eigendecomposition:'
        print >> out, w
        print >> out, v
        print >> out, 'perron value:'
        print >> out, w[-1]
        print >> out, 'reciprocal of perron value:'
        print >> out, 1 / w[-1]
        print >> out
        start += n
    print >> out
    # analyze subtrees
    print >> out, 'subtree Laplacian analysis:'
    print >> out
    start = 0
    for lst in vertex_lists:
        n = len(lst)
        C = Ftree.TB_to_L_schur(T, B, lst + [r])
        w, v = scipy.linalg.eigh(C)
        print >> out, 'subtree Laplacian:'
        print >> out, C
        print >> out, 'eigendecomposition:'
        print >> out, w
        print >> out, v
        print >> out
        start += n
    # analyze subtrees
    print >> out, 'full Schur complement subtree analysis:'
    print >> out
    start = 0
    for lst in leaf_lists:
        n = len(lst)
        C = Ftree.TB_to_L_schur(T, B, lst + [r])
        w, v = scipy.linalg.eigh(C)
        print >> out, 'full Schur complement in subtree:'
        print >> out, C
        print >> out, 'eigendecomposition:'
        print >> out, w
        print >> out, v
        print >> out
        start += n
    return out.getvalue()
Exemple #13
0
 def finish(self):
     r = Ftree.R_to_root(self.R)
     if r in self.v_to_hanging_length:
         raise FtreeIOError('the root should not have a hanging branch')