def NNI(t): """ Randomly select an internal node to do NNI alters the tree <t> Returns: t, (new) order """ while True: parent = random.choice(t.internal_nodes()) # choose one of the kids as target # and another as sibling target, sibling = random.sample(parent.child_nodes(), 2) if target.is_leaf(): continue else: # select one children from target to swap w/ sibling child = random.choice(target.child_nodes()) break print >> sys.stderr, "NNI: parent {0}, target {1}, sibling {2}, child {3}".format(\ parent.label, target.label, sibling.label, child.label) # swap child & sibling in tree new_child_branch = child.edge_length + target.edge_length new_sibling_branch = sibling.edge_length - target.edge_length parent.remove_child(sibling) target.remove_child(child) parent.add_child(child, new_child_branch) target.add_child(sibling, new_sibling_branch) # obtain new order via postorder traversal (should be fast enough) order = Tree.postorder_assign_then_traverse(t, None, do_assign=False) return t, order
def optimize_branch_fast(tlobj, tprime, children_index_list): """ Quick optimization of subset of branches (indicated by <children_index_list>) in tprime while using tlobj for parameters children_index_list --- list of (i, j) indicating that we want to iteratively refine the branch of node label i --- node label j Most likely will be the 3 local branches at the new insertion point of a subtree. For fast optimization, relax the branch length variation to 0.1. Returns: final (positive) log likelihood of tprime NOTE: this only changes branch lengths in tprime. tlobj not affected!! NOTE: reversible_subtree_func currently cheats by only recalc-ing the entries of parent and up, to ensure this works, I think making sure copy_{S|P} are correct is important and therefore the two g() calls. """ g = MyMat.calc_likelihood meat = scipy.optimize.fmin_l_bfgs_b def reversible_subtree_func(copy_S, copy_P, parent, child, t_a): #assert len(t_a) == 1 child.edge_length = t_a[0] order = Tree.postorder_cheat_traverse(parent) L_single = g(tlobj.single_model.gtr.R, copy_S, tlobj.log_freq_single, \ order, range(tlobj.ncol), tlobj.nnode, tlobj.ncol, tlobj.nbase) L_paired = g(tlobj.paired_model.gtr.R, copy_P, tlobj.log_freq_paired, \ order, range(tlobj.ncol_p), tlobj.nnode_p, tlobj.ncol_p, tlobj.nbase_p) ans = -(L_single.sum() + L_paired.sum()) return ans # TODO: make this more efficient! copy_S = tlobj.S.copy() copy_P = tlobj.P.copy() order = Tree.postorder_assign_then_traverse(tprime, None, False) g(tlobj.single_model.gtr.R, copy_S, tlobj.log_freq_single, \ order, range(tlobj.ncol), tlobj.nnode, tlobj.ncol, tlobj.nbase) g(tlobj.paired_model.gtr.R, copy_P, tlobj.log_freq_paired, \ order, range(tlobj.ncol_p), tlobj.nnode_p, tlobj.ncol_p, tlobj.nbase_p) changed = True while changed: changed = False for i, j in children_index_list: parent = tprime.find_node_with_label(i) child = tprime.find_node_with_label(j) old_t_a = child.edge_length func = lambda x: reversible_subtree_func(copy_S, copy_P, parent, child, x) x, fx, d = meat(func, [old_t_a], approx_grad=True, \ bounds=[(1e-3, 10)], pgtol=1e-2) # print "calling func {0}--{1} done".format(i,j), x, fx, d, x[0], old_t_a, abs(x[0] - old_t_a) if d['warnflag'] != 0: return None, None # handle this appropriately! if abs(x[0] - old_t_a) > 0.1: changed = True return fx, tprime
def __init__(self, msa, tree, single_model, paired_model, treat_gap_as_missing): """ Input: msa --- MSA object (the alignment) tree --- initially is the starting tree (dendropy.Tree) single/paired model -- EvoModel.{single|paired}model objects Also has the following attributes: single_cols --- list of unpaired positions paired_cols --- list of paired positions (i, j) order --- postorder traversal (often changes! beware!) like --- positive log likelihood (often changes! beware!) nnode, ncol, nbase for single/paired parameters... NOTE: ENFORCES tree TO BE BINARY """ self.msa = msa self.tree = tree self.single_model = single_model self.paired_model = paired_model self.treat_gap_as_missing = treat_gap_as_missing self.log_freq_single = log(self.single_model.Frequency) self.log_freq_paired = log(self.paired_model.Frequency) self.single_cols = msa.single_cols() self.paired_cols = msa.BP.items() self.paired_cols.sort() self.nnode = 2*msa.nseq + 1 self.ncol = len(self.single_cols) self.nbase = 5 self.nnode_p = self.nnode self.ncol_p = len(self.paired_cols) self.nbase_p = 25 Tree.make_tree_binary(self.tree) # must preceded self.order! self.order = Tree.postorder_assign_then_traverse(tree, list(msa.ids)) self.like = None # should be the positive log likelihood self.S = None # likelihood matrix for single positions self.P = None # likelihood matrix for paired positions
def update_order(self): self.order = Tree.postorder_assign_then_traverse(self.tree, None, False)