Ejemplo n.º 1
0
 def __call__(self, X_logs):
     """
     The vth entry of X corresponds to the log rate of the branch above v.
     Return the quantity to be minimized (the neg log likelihood).
     @param X: vector of branch rate logs
     @return: negative log likelihood
     """
     X = [math.exp(x) for x in X_logs]
     B_subs = {}
     for v_parent, v_child in self.R:
         edge = frozenset([v_parent, v_child])
         r = X[v_child]
         t = self.B[edge]
         B_subs[edge] = r * t
     newick_string = FtreeIO.RBN_to_newick(self.R, B_subs, self.N_leaves)
     tree = Newick.parse(newick_string, Newick.NewickTree)
     # define the rate matrix object; horrible
     dictionary_rate_matrix = RateMatrix.get_jukes_cantor_rate_matrix() 
     ordered_states = list('ACGT') 
     row_major_rate_matrix = MatrixUtil.dict_to_row_major(
             dictionary_rate_matrix, ordered_states, ordered_states)
     rate_matrix_object = RateMatrix.RateMatrix(
             row_major_rate_matrix, ordered_states) 
     # get the log likelihood
     ll = PhyLikelihood.get_log_likelihood(
             tree, self.alignment, rate_matrix_object)
     return -ll
Ejemplo n.º 2
0
def get_starting_tree_defn(R, B, N_leaves):
    """
    """
    out = StringIO()
    N_aug = dict((v, 'taxon_' + name) for v, name in N_leaves.items())
    print >> out, '<newick id="startingTree">'
    print >> out, FtreeIO.RBN_to_newick(R, B, N_aug)
    print >> out, '</newick>'
    return out.getvalue().rstrip()
Ejemplo n.º 3
0
def get_response_content(fs):
    # read the tree
    T, B, N = FtreeIO.newick_to_TBN(fs.tree)
    leaves = Ftree.T_to_leaves(T)
    internal = Ftree.T_to_internal_vertices(T)
    # get the valuations with harmonic extensions
    w, V = Ftree.TB_to_harmonic_extension(T, B, leaves, internal)
    # get the Fiedler valuations with harmonic extensions
    h = V[:, 0]
    # check for vertices with small valuations
    eps = 1e-8
    if any(abs(x) < x for x in h):
        raise ValueError('the tree has no clear harmonic Fiedler point')
    # find the edge contining the harmonic Fiedler point
    v_to_val = dict((v, h[i]) for i, v in enumerate(leaves + internal))
    d_edges = [(a, b) for a, b in T if v_to_val[a] * v_to_val[b] < 0]
    if len(d_edges) != 1:
        raise ValueError('expected the point to fall clearly on a single edge')
    d_edge = d_edges[0]
    a, b = d_edge
    # find the proportion along the directed edge
    t = v_to_val[a] / (v_to_val[a] - v_to_val[b])
    # find the distance from the new root to each endpoint vertices
    u_edge = frozenset(d_edge)
    d = B[u_edge]
    da = t * d
    db = (1 - t) * d
    # create the new tree
    r = max(Ftree.T_to_order(T)) + 1
    N[r] = fs.root_name
    T.remove(u_edge)
    del B[u_edge]
    ea = frozenset((r, a))
    eb = frozenset((r, b))
    T.add(ea)
    T.add(eb)
    B[ea] = da
    B[eb] = db
    # add a new leaf with arbitrary branch length
    leaf = r + 1
    N[leaf] = fs.leaf_name
    u_edge = frozenset((r, leaf))
    T.add(u_edge)
    B[u_edge] = 1.0
    # get the best branch length to cause eigenvalue multiplicity
    blen = scipy.optimize.golden(get_gap, (T, B, u_edge),
                                 full_output=False,
                                 tol=1e-12)
    B[u_edge] = blen
    # return the string representation of the new tree
    R = Ftree.T_to_R_specific(T, r)
    return FtreeIO.RBN_to_newick(R, B, N)
Ejemplo n.º 4
0
def get_response_content(fs):
    # read the tree
    T, B, N = FtreeIO.newick_to_TBN(fs.tree)
    leaves = Ftree.T_to_leaves(T)
    internal = Ftree.T_to_internal_vertices(T)
    # get the valuations with harmonic extensions
    w, V = Ftree.TB_to_harmonic_extension(T, B, leaves, internal)
    # get the Fiedler valuations with harmonic extensions
    h = V[:, 0]
    # check for vertices with small valuations
    eps = 1e-8
    if any(abs(x) < x for x in h):
        raise ValueError('the tree has no clear harmonic Fiedler point')
    # find the edge contining the harmonic Fiedler point
    v_to_val = dict((v, h[i]) for i, v in enumerate(leaves + internal))
    d_edges = [(a, b) for a, b in T if v_to_val[a] * v_to_val[b] < 0]
    if len(d_edges) != 1:
        raise ValueError('expected the point to fall clearly on a single edge')
    d_edge = d_edges[0]
    a, b = d_edge
    # find the proportion along the directed edge
    t = v_to_val[a] / (v_to_val[a] - v_to_val[b])
    # find the distance from the new root to each endpoint vertices
    u_edge = frozenset(d_edge)
    d = B[u_edge]
    da = t * d
    db = (1 - t) * d
    # create the new tree
    r = max(Ftree.T_to_order(T)) + 1
    T.remove(u_edge)
    del B[u_edge]
    ea = frozenset((r, a))
    eb = frozenset((r, b))
    T.add(ea)
    T.add(eb)
    B[ea] = da
    B[eb] = db
    R = Ftree.T_to_R_specific(T, r)
    # return the string representation of the new tree
    return FtreeIO.RBN_to_newick(R, B, N)
Ejemplo n.º 5
0
def get_response_content(fs):
    # init the response and get the user variables
    out = StringIO()
    nleaves = fs.nleaves
    nvertices = nleaves * 2 - 1
    nbranches = nvertices - 1
    nsites = fs.nsites
    # sample the coalescent tree with timelike branch lengths
    R, B = kingman.sample(fs.nleaves)
    r = Ftree.R_to_root(R)
    # get the leaf vertex names
    N = dict(zip(range(nleaves), string.uppercase[:nleaves]))
    N_leaves = dict(N)
    # get the internal vertex names
    v_to_leaves = R_to_v_to_leaves(R)
    for v, leaves in sorted(v_to_leaves.items()):
        if len(leaves) > 1:
            N[v] = ''.join(sorted(N[leaf] for leaf in leaves))
    # get vertex ages
    v_to_age = kingman.RB_to_v_to_age(R, B)
    # sample the rates on the branches
    b_to_rate = sample_b_to_rate(R)
    xycorr = get_correlation(R, b_to_rate)
    # define B_subs in terms of substitutions instead of time
    B_subs = dict((p, t * b_to_rate[p]) for p, t in B.items())
    # sample the alignment
    v_to_seq = sample_v_to_seq(R, B_subs, nsites)
    # get the log likelihood; this is kind of horrible
    pairs = [(N[v], ''.join(v_to_seq[v])) for v in range(nleaves)]
    headers, sequences = zip(*pairs)
    alignment = Fasta.create_alignment(headers, sequences)
    newick_string = FtreeIO.RBN_to_newick(R, B_subs, N_leaves)
    tree = Newick.parse(newick_string, Newick.NewickTree)
    dictionary_rate_matrix = RateMatrix.get_jukes_cantor_rate_matrix() 
    ordered_states = list('ACGT') 
    row_major_rate_matrix = MatrixUtil.dict_to_row_major(
            dictionary_rate_matrix, ordered_states, ordered_states)
    rate_matrix_object = RateMatrix.RateMatrix(
            row_major_rate_matrix, ordered_states) 
    ll = PhyLikelihood.get_log_likelihood(
            tree, alignment, rate_matrix_object)
    # get ll when rates are all 1.0
    newick_string = FtreeIO.RBN_to_newick(R, B, N_leaves)
    tree = Newick.parse(newick_string, Newick.NewickTree)
    ll_unity = PhyLikelihood.get_log_likelihood(
            tree, alignment, rate_matrix_object)
    # get ll when rates are numerically optimized
    # TODO incorporate the result into the xml file
    # TODO speed up the likelihood evaluation (beagle? C module?)
    #f = Opt(R, B, N_leaves, alignment)
    #X_logs = [0.0] * nbranches
    #result = scipy.optimize.fmin(f, X_logs, full_output=True)
    #print result
    #
    print >> out, '<?xml version="1.0"?>'
    print >> out, '<beast>'
    print >> out
    print >> out, '<!-- actual rate autocorrelation', xycorr, '-->'
    print >> out, '<!-- actual root height', v_to_age[r], '-->'
    print >> out, '<!-- actual log likelihood', ll, '-->'
    print >> out, '<!-- ll if rates were unity', ll_unity, '-->'
    print >> out
    print >> out, '<!--'
    print >> out, 'predefine the taxa as in'
    print >> out, 'http://beast.bio.ed.ac.uk/Introduction_to_XML_format'
    print >> out, '-->'
    print >> out, get_leaf_taxon_defn(list(string.uppercase[:nleaves]))
    print >> out
    print >> out, '<!--'
    print >> out, 'define the alignment as in'
    print >> out, 'http://beast.bio.ed.ac.uk/Introduction_to_XML_format'
    print >> out, '-->'
    print >> out, get_alignment_defn(leaves, N, v_to_seq)
    print >> out
    print >> out, '<!--'
    print >> out, 'specify the starting tree as in'
    print >> out, 'http://beast.bio.ed.ac.uk/Tutorial_4'
    print >> out, '-->'
    print >> out, get_starting_tree_defn(R, B, N_leaves)
    print >> out
    print >> out, '<!--'
    print >> out, 'connect the tree model as in'
    print >> out, 'http://beast.bio.ed.ac.uk/Tutorial_4'
    print >> out, '-->'
    print >> out, g_tree_model_defn
    print >> out
    print >> out, g_uncorrelated_relaxed_clock_info
    print >> out
    """
    print >> out, '<!--'
    print >> out, 'create a list of taxa for which to constrain the mrca as in'
    print >> out, 'http://beast.bio.ed.ac.uk/Tutorial_3.1'
    print >> out, '-->'
    for v, leaves in sorted(v_to_leaves.items()):
        if len(leaves) > 1:
            print >> out, get_mrca_subset_defn(N, v, leaves)
    print >> out
    print >> out, '<!--'
    print >> out, 'create a tmrcaStatistic that will record the height as in'
    print >> out, 'http://beast.bio.ed.ac.uk/Tutorial_3.1'
    print >> out, '-->'
    for v, leaves in sorted(v_to_leaves.items()):
        if len(leaves) > 1:
            print >> out, get_mrca_stat_defn(N[v])
    """
    print >> out
    print >> out, g_likelihood_info
    print >> out
    print >> out, '<!--'
    print >> out, 'run the mcmc'
    print >> out, 'http://beast.bio.ed.ac.uk/Tutorial_3.1'
    print >> out, '-->'
    print >> out, get_mcmc_defn(v_to_leaves, v_to_age, N)
    print >> out
    print >> out, '</beast>'
    # return the response
    return out.getvalue()
Ejemplo n.º 6
0
def get_response_content(fs):
    nseconds_limit = 5.0
    R_true, B_true = FtreeIO.newick_to_RB(fs.true_tree, int)
    R_test = FtreeIO.newick_to_R(fs.test_tree, int)
    # get the unrooted tree topology
    T_true = Ftree.R_to_T(R_true)
    T_test = Ftree.R_to_T(R_test)
    # check the trees for vertex compatibility
    if set(Ftree.T_to_order(T_true)) != set(Ftree.T_to_order(T_test)):
        raise ValueError('vertex sets are not equal')
    if set(Ftree.T_to_leaves(T_true)) != set(Ftree.T_to_leaves(T_test)):
        raise ValueError('leaf vertex sets are not equal')
    if set(Ftree.T_to_internal_vertices(T_true)) != set(
            Ftree.T_to_internal_vertices(T_test)):
        raise ValueError('internal vertex sets are not equal')
    # get the 2D MDS for the true tree
    leaves = Ftree.T_to_leaves(T_true)
    internal = Ftree.T_to_internal_vertices(T_true)
    vertices = leaves + internal
    L_schur = Ftree.TB_to_L_schur(T_true, B_true, leaves)
    w_all, Vp_all = scipy.linalg.eigh(L_schur)
    w, Vp = w_all[1:3], Vp_all[:, 1:3]
    # make the constant matrix for Frobenius norm comparison
    C = np.zeros((len(vertices), 2))
    C[:len(leaves)] = w * Vp
    # keep doing iterations until we run out of time
    mymax = 256
    t_initial = time.time()
    while time.time() - t_initial < nseconds_limit / 2:
        mymax *= 2
        f = Functor(T_test.copy(), Vp.copy(), C.copy(), w.copy())
        initial_guess = np.ones(len(T_test) + 2 * len(internal))
        results = scipy.optimize.fmin(f,
                                      initial_guess,
                                      ftol=1e-8,
                                      xtol=1e-8,
                                      full_output=True,
                                      maxfun=mymax,
                                      maxiter=mymax)
        xopt, fopt, itr, funcalls, warnflag = results
    # look at the values from the longest running iteration
    B, Vr = f.X_to_B_Vr(xopt)
    L, V = f.X_to_L_V(xopt)
    Lrr = Ftree.TB_to_L_block(T_test, B, internal, internal)
    Lrp = Ftree.TB_to_L_block(T_test, B, internal, leaves)
    H_ext = -np.dot(np.linalg.pinv(Lrr), Lrp)
    N = dict((v, str(v)) for v in vertices)
    # start writing the response
    out = StringIO()
    print >> out, 'xopt:', xopt
    print >> out, 'fopt:', fopt
    print >> out, 'number of iterations:', itr
    print >> out, 'number of function calls:', funcalls
    print >> out, 'warning flags:', warnflag
    print >> out, 'first four eigenvalues:', w_all[:4]
    print >> out, 'Vr:'
    print >> out, Vr
    print >> out, '-Lrr^-1 Lrp Vp:'
    print >> out, np.dot(H_ext, Vp)
    print >> out, C
    print >> out, np.dot(L, V)
    print >> out, FtreeIO.RBN_to_newick(R_test, B, N)
    return out.getvalue()