def __call__(self, X_logs): """ The vth entry of X corresponds to the log rate of the branch above v. Return the quantity to be minimized (the neg log likelihood). @param X: vector of branch rate logs @return: negative log likelihood """ X = [math.exp(x) for x in X_logs] B_subs = {} for v_parent, v_child in self.R: edge = frozenset([v_parent, v_child]) r = X[v_child] t = self.B[edge] B_subs[edge] = r * t newick_string = FtreeIO.RBN_to_newick(self.R, B_subs, self.N_leaves) tree = Newick.parse(newick_string, Newick.NewickTree) # define the rate matrix object; horrible dictionary_rate_matrix = RateMatrix.get_jukes_cantor_rate_matrix() ordered_states = list('ACGT') row_major_rate_matrix = MatrixUtil.dict_to_row_major( dictionary_rate_matrix, ordered_states, ordered_states) rate_matrix_object = RateMatrix.RateMatrix( row_major_rate_matrix, ordered_states) # get the log likelihood ll = PhyLikelihood.get_log_likelihood( tree, self.alignment, rate_matrix_object) return -ll
def get_starting_tree_defn(R, B, N_leaves): """ """ out = StringIO() N_aug = dict((v, 'taxon_' + name) for v, name in N_leaves.items()) print >> out, '<newick id="startingTree">' print >> out, FtreeIO.RBN_to_newick(R, B, N_aug) print >> out, '</newick>' return out.getvalue().rstrip()
def get_response_content(fs): # read the tree T, B, N = FtreeIO.newick_to_TBN(fs.tree) leaves = Ftree.T_to_leaves(T) internal = Ftree.T_to_internal_vertices(T) # get the valuations with harmonic extensions w, V = Ftree.TB_to_harmonic_extension(T, B, leaves, internal) # get the Fiedler valuations with harmonic extensions h = V[:, 0] # check for vertices with small valuations eps = 1e-8 if any(abs(x) < x for x in h): raise ValueError('the tree has no clear harmonic Fiedler point') # find the edge contining the harmonic Fiedler point v_to_val = dict((v, h[i]) for i, v in enumerate(leaves + internal)) d_edges = [(a, b) for a, b in T if v_to_val[a] * v_to_val[b] < 0] if len(d_edges) != 1: raise ValueError('expected the point to fall clearly on a single edge') d_edge = d_edges[0] a, b = d_edge # find the proportion along the directed edge t = v_to_val[a] / (v_to_val[a] - v_to_val[b]) # find the distance from the new root to each endpoint vertices u_edge = frozenset(d_edge) d = B[u_edge] da = t * d db = (1 - t) * d # create the new tree r = max(Ftree.T_to_order(T)) + 1 N[r] = fs.root_name T.remove(u_edge) del B[u_edge] ea = frozenset((r, a)) eb = frozenset((r, b)) T.add(ea) T.add(eb) B[ea] = da B[eb] = db # add a new leaf with arbitrary branch length leaf = r + 1 N[leaf] = fs.leaf_name u_edge = frozenset((r, leaf)) T.add(u_edge) B[u_edge] = 1.0 # get the best branch length to cause eigenvalue multiplicity blen = scipy.optimize.golden(get_gap, (T, B, u_edge), full_output=False, tol=1e-12) B[u_edge] = blen # return the string representation of the new tree R = Ftree.T_to_R_specific(T, r) return FtreeIO.RBN_to_newick(R, B, N)
def get_response_content(fs): # read the tree T, B, N = FtreeIO.newick_to_TBN(fs.tree) leaves = Ftree.T_to_leaves(T) internal = Ftree.T_to_internal_vertices(T) # get the valuations with harmonic extensions w, V = Ftree.TB_to_harmonic_extension(T, B, leaves, internal) # get the Fiedler valuations with harmonic extensions h = V[:, 0] # check for vertices with small valuations eps = 1e-8 if any(abs(x) < x for x in h): raise ValueError('the tree has no clear harmonic Fiedler point') # find the edge contining the harmonic Fiedler point v_to_val = dict((v, h[i]) for i, v in enumerate(leaves + internal)) d_edges = [(a, b) for a, b in T if v_to_val[a] * v_to_val[b] < 0] if len(d_edges) != 1: raise ValueError('expected the point to fall clearly on a single edge') d_edge = d_edges[0] a, b = d_edge # find the proportion along the directed edge t = v_to_val[a] / (v_to_val[a] - v_to_val[b]) # find the distance from the new root to each endpoint vertices u_edge = frozenset(d_edge) d = B[u_edge] da = t * d db = (1 - t) * d # create the new tree r = max(Ftree.T_to_order(T)) + 1 T.remove(u_edge) del B[u_edge] ea = frozenset((r, a)) eb = frozenset((r, b)) T.add(ea) T.add(eb) B[ea] = da B[eb] = db R = Ftree.T_to_R_specific(T, r) # return the string representation of the new tree return FtreeIO.RBN_to_newick(R, B, N)
def get_response_content(fs): # init the response and get the user variables out = StringIO() nleaves = fs.nleaves nvertices = nleaves * 2 - 1 nbranches = nvertices - 1 nsites = fs.nsites # sample the coalescent tree with timelike branch lengths R, B = kingman.sample(fs.nleaves) r = Ftree.R_to_root(R) # get the leaf vertex names N = dict(zip(range(nleaves), string.uppercase[:nleaves])) N_leaves = dict(N) # get the internal vertex names v_to_leaves = R_to_v_to_leaves(R) for v, leaves in sorted(v_to_leaves.items()): if len(leaves) > 1: N[v] = ''.join(sorted(N[leaf] for leaf in leaves)) # get vertex ages v_to_age = kingman.RB_to_v_to_age(R, B) # sample the rates on the branches b_to_rate = sample_b_to_rate(R) xycorr = get_correlation(R, b_to_rate) # define B_subs in terms of substitutions instead of time B_subs = dict((p, t * b_to_rate[p]) for p, t in B.items()) # sample the alignment v_to_seq = sample_v_to_seq(R, B_subs, nsites) # get the log likelihood; this is kind of horrible pairs = [(N[v], ''.join(v_to_seq[v])) for v in range(nleaves)] headers, sequences = zip(*pairs) alignment = Fasta.create_alignment(headers, sequences) newick_string = FtreeIO.RBN_to_newick(R, B_subs, N_leaves) tree = Newick.parse(newick_string, Newick.NewickTree) dictionary_rate_matrix = RateMatrix.get_jukes_cantor_rate_matrix() ordered_states = list('ACGT') row_major_rate_matrix = MatrixUtil.dict_to_row_major( dictionary_rate_matrix, ordered_states, ordered_states) rate_matrix_object = RateMatrix.RateMatrix( row_major_rate_matrix, ordered_states) ll = PhyLikelihood.get_log_likelihood( tree, alignment, rate_matrix_object) # get ll when rates are all 1.0 newick_string = FtreeIO.RBN_to_newick(R, B, N_leaves) tree = Newick.parse(newick_string, Newick.NewickTree) ll_unity = PhyLikelihood.get_log_likelihood( tree, alignment, rate_matrix_object) # get ll when rates are numerically optimized # TODO incorporate the result into the xml file # TODO speed up the likelihood evaluation (beagle? C module?) #f = Opt(R, B, N_leaves, alignment) #X_logs = [0.0] * nbranches #result = scipy.optimize.fmin(f, X_logs, full_output=True) #print result # print >> out, '<?xml version="1.0"?>' print >> out, '<beast>' print >> out print >> out, '<!-- actual rate autocorrelation', xycorr, '-->' print >> out, '<!-- actual root height', v_to_age[r], '-->' print >> out, '<!-- actual log likelihood', ll, '-->' print >> out, '<!-- ll if rates were unity', ll_unity, '-->' print >> out print >> out, '<!--' print >> out, 'predefine the taxa as in' print >> out, 'http://beast.bio.ed.ac.uk/Introduction_to_XML_format' print >> out, '-->' print >> out, get_leaf_taxon_defn(list(string.uppercase[:nleaves])) print >> out print >> out, '<!--' print >> out, 'define the alignment as in' print >> out, 'http://beast.bio.ed.ac.uk/Introduction_to_XML_format' print >> out, '-->' print >> out, get_alignment_defn(leaves, N, v_to_seq) print >> out print >> out, '<!--' print >> out, 'specify the starting tree as in' print >> out, 'http://beast.bio.ed.ac.uk/Tutorial_4' print >> out, '-->' print >> out, get_starting_tree_defn(R, B, N_leaves) print >> out print >> out, '<!--' print >> out, 'connect the tree model as in' print >> out, 'http://beast.bio.ed.ac.uk/Tutorial_4' print >> out, '-->' print >> out, g_tree_model_defn print >> out print >> out, g_uncorrelated_relaxed_clock_info print >> out """ print >> out, '<!--' print >> out, 'create a list of taxa for which to constrain the mrca as in' print >> out, 'http://beast.bio.ed.ac.uk/Tutorial_3.1' print >> out, '-->' for v, leaves in sorted(v_to_leaves.items()): if len(leaves) > 1: print >> out, get_mrca_subset_defn(N, v, leaves) print >> out print >> out, '<!--' print >> out, 'create a tmrcaStatistic that will record the height as in' print >> out, 'http://beast.bio.ed.ac.uk/Tutorial_3.1' print >> out, '-->' for v, leaves in sorted(v_to_leaves.items()): if len(leaves) > 1: print >> out, get_mrca_stat_defn(N[v]) """ print >> out print >> out, g_likelihood_info print >> out print >> out, '<!--' print >> out, 'run the mcmc' print >> out, 'http://beast.bio.ed.ac.uk/Tutorial_3.1' print >> out, '-->' print >> out, get_mcmc_defn(v_to_leaves, v_to_age, N) print >> out print >> out, '</beast>' # return the response return out.getvalue()
def get_response_content(fs): nseconds_limit = 5.0 R_true, B_true = FtreeIO.newick_to_RB(fs.true_tree, int) R_test = FtreeIO.newick_to_R(fs.test_tree, int) # get the unrooted tree topology T_true = Ftree.R_to_T(R_true) T_test = Ftree.R_to_T(R_test) # check the trees for vertex compatibility if set(Ftree.T_to_order(T_true)) != set(Ftree.T_to_order(T_test)): raise ValueError('vertex sets are not equal') if set(Ftree.T_to_leaves(T_true)) != set(Ftree.T_to_leaves(T_test)): raise ValueError('leaf vertex sets are not equal') if set(Ftree.T_to_internal_vertices(T_true)) != set( Ftree.T_to_internal_vertices(T_test)): raise ValueError('internal vertex sets are not equal') # get the 2D MDS for the true tree leaves = Ftree.T_to_leaves(T_true) internal = Ftree.T_to_internal_vertices(T_true) vertices = leaves + internal L_schur = Ftree.TB_to_L_schur(T_true, B_true, leaves) w_all, Vp_all = scipy.linalg.eigh(L_schur) w, Vp = w_all[1:3], Vp_all[:, 1:3] # make the constant matrix for Frobenius norm comparison C = np.zeros((len(vertices), 2)) C[:len(leaves)] = w * Vp # keep doing iterations until we run out of time mymax = 256 t_initial = time.time() while time.time() - t_initial < nseconds_limit / 2: mymax *= 2 f = Functor(T_test.copy(), Vp.copy(), C.copy(), w.copy()) initial_guess = np.ones(len(T_test) + 2 * len(internal)) results = scipy.optimize.fmin(f, initial_guess, ftol=1e-8, xtol=1e-8, full_output=True, maxfun=mymax, maxiter=mymax) xopt, fopt, itr, funcalls, warnflag = results # look at the values from the longest running iteration B, Vr = f.X_to_B_Vr(xopt) L, V = f.X_to_L_V(xopt) Lrr = Ftree.TB_to_L_block(T_test, B, internal, internal) Lrp = Ftree.TB_to_L_block(T_test, B, internal, leaves) H_ext = -np.dot(np.linalg.pinv(Lrr), Lrp) N = dict((v, str(v)) for v in vertices) # start writing the response out = StringIO() print >> out, 'xopt:', xopt print >> out, 'fopt:', fopt print >> out, 'number of iterations:', itr print >> out, 'number of function calls:', funcalls print >> out, 'warning flags:', warnflag print >> out, 'first four eigenvalues:', w_all[:4] print >> out, 'Vr:' print >> out, Vr print >> out, '-Lrr^-1 Lrp Vp:' print >> out, np.dot(H_ext, Vp) print >> out, C print >> out, np.dot(L, V) print >> out, FtreeIO.RBN_to_newick(R_test, B, N) return out.getvalue()