def remove_kink(node, curroot): """ smooth the kink created by prunning to prevent creating orphaned tips after prunning twice at the same node """ if node == curroot: print "fix bifurcating root by moving the root away to an adjacent none-tip" if curroot.nchildren == 1: curroot = curroot.children[0] curroot.parent = None assert curroot.nchildren == 2, \ "check tree root format: "+newick3.tosting(curroot)+";" if curroot.children[0].istip: #the other child is not tip curroot = phylo3.reroot(curroot, curroot.children[1]) else: curroot = phylo3.reroot(curroot, curroot.children[0]) else: #---node---< all nodes should have one child only now length = node.length + (node.children[0]).length par = node.parent kink = node node = node.children[0] #parent--kink---node< par.remove_child(kink) par.add_child(node) node.length = length return node, curroot
def remove_kink(node,curroot): """ smooth the kink created by prunning to prevent creating orphaned tips after prunning twice at the same node """ if node == curroot: print "fix bifurcating root by moving the root away to an adjacent none-tip" if curroot.nchildren == 1: curroot = curroot.children[0] curroot.parent = None assert curroot.nchildren == 2, \ "check tree root format: "+newick3.tosting(curroot)+";" if curroot.children[0].istip: #the other child is not tip curroot = phylo3.reroot(curroot,curroot.children[1]) else: curroot = phylo3.reroot(curroot,curroot.children[0]) else: #---node---< all nodes should have one child only now length = node.length + (node.children[0]).length par = node.parent kink = node node = node.children[0] #parent--kink---node< par.remove_child(kink) par.add_child(node) node.length = length return node,curroot
def remove_kink(node,curroot): if node == curroot and curroot.nchildren == 2: #move the root away to an adjacent none-tip if curroot.children[0].istip: #the other child is not tip curroot = phylo3.reroot(curroot,curroot.children[1]) else: curroot = phylo3.reroot(curroot,curroot.children[0]) #---node---< all nodes should have one child only now length = node.length + (node.children[0]).length par = node.parent kink = node node = node.children[0] #parent--kink---node< par.remove_child(kink) par.add_child(node) node.length = length return node,curroot
def extract_rooted_ingroup_clades(root, ingroups, outgroups, min_ingroup_taxa): """ input a tree with ingroups and at least 1 outgroups output a list of rooted ingroup clades """ inclades = [] while True: max_score, direction, max_node = 0, "", None for node in root.iternodes(): front, back = 0, 0 front_names_set = set(get_front_names(node)) for name in front_names_set: if name in outgroups: front = -1 break elif name in ingroups: front += 1 else: sys.exit("Check taxonID " + name) back_names_set = set(get_back_names(node, root)) for name in back_names_set: if name in outgroups: back = -1 break elif name in ingroups: back += 1 else: sys.exit("Check taxonID " + name) if front > max_score: max_score, direction, max_node = front, "front", node if back > max_score: max_score, direction, max_node = back, "back", node #print max_score,direction if max_score >= min_ingroup_taxa: if direction == "front": inclades.append(max_node) kink = max_node.prune() if len(root.leaves()) > 3: newnode, root = remove_kink(kink, root) else: break elif direction == "back": par = max_node.parent par.remove_child(max_node) max_node.prune() inclades.append(phylo3.reroot(root, par)) #flip dirction if len(max_node.leaves()) > 3: max_node, root = remove_kink(max_node, max_node) else: break else: break return inclades
def remove_kink(node,curroot): """ smooth the kink created by prunning to prevent creating orphaned tips after prunning twice at the same node """ if node == curroot and curroot.nchildren == 2: #move the root away to an adjacent none-tip if curroot.children[0].istip: #the other child is not tip curroot = phylo3.reroot(curroot,curroot.children[1]) else: curroot = phylo3.reroot(curroot,curroot.children[0]) #---node---< all nodes should have one child only now length = node.length + (node.children[0]).length par = node.parent kink = node node = node.children[0] #parent--kink---node< par.remove_child(kink) par.add_child(node) node.length = length return node,curroot
def cut_long_branches(curroot, cutoff): going = True subtrees = [] #store all subtrees after cutting if curroot.nchildren == 2: #fix the root #move the root away to an adjacent none-tip internal node if curroot.children[0].istip: #the other child is not tip curroot = phylo3.reroot(curroot, curroot.children[1]) else: #tree has >=4 leaves so the other node cannot be tip curroot = phylo3.reroot(curroot, curroot.children[0]) while going: going = False #only keep going if long branches were found during last round for node in curroot.iternodes(): #Walk through nodes if node != curroot and node.length > cutoff: subtrees.append(node) node = node.prune() if len(curroot.leaves()) >= 4: node, curroot = remove_kink(node, curroot) going = True break subtrees.append(curroot) #write out the residue after cutting return subtrees
def cut_long_branches(curroot,cutoff): going = True subtrees = [] #store all subtrees after cutting if curroot.nchildren == 2: #fix the root #move the root away to an adjacent none-tip internal node if curroot.children[0].istip: #the other child is not tip curroot = phylo3.reroot(curroot,curroot.children[1]) else: #tree has >=4 leaves so the other node cannot be tip curroot = phylo3.reroot(curroot,curroot.children[0]) while going: going = False #only keep going if long branches were found during last round for node in curroot.iternodes(): #Walk through nodes if node != curroot and node.length > cutoff: subtrees.append(node) node = node.prune() if len(curroot.leaves()) >= 4: node,curroot = remove_kink(node,curroot) going = True break subtrees.append(curroot) #write out the residue after cutting return subtrees
def reroot_with_monophyletic_outgroups(root): lvs = root.leaves() outgroup_matches = {} #key is label, value is the tip node object #Since no taxon repeat in outgroups name and leaf is one-to-one outgroup_labels = [] for leaf in lvs: label = leaf.label name = get_name(label) if name in OUTGROUPS: outgroup_matches[label] = leaf outgroup_labels.append(leaf.label) if len(outgroup_labels) == 1: #one single outgroup #cannot reroot on a tip so have to go one more node into the ingroup new_root = outgroup_matches[outgroup_labels[0]].parent return phylo3.reroot(root,new_root) else: #has multiple outgroups. Check monophyly and reroot newroot = None for node in root.iternodes(): if node == root: continue #skip the root front_names = get_front_names(node) back_names = get_back_names(node,root) front_in_names,front_out_names,back_in_names,back_out_names = 0,0,0,0 for i in front_names: if i in OUTGROUPS: front_out_names += 1 else: front_in_names += 1 for j in back_names: if j in OUTGROUPS: back_out_names += 1 else: back_in_names += 1 if front_in_names==0 and front_out_names>0 and back_in_names>0 and back_out_names==0: newroot = node #ingroup at back, outgroup in front break if front_in_names>0 and front_out_names==0 and back_in_names==0 and back_out_names>0: newroot = node.parent #ingroup in front, outgroup at back break if newroot != None: return phylo3.reroot(root,newroot) else: return None
def extract_rooted_ingroup_clades(root,ingroups,outgroups,min_ingroup_taxa): """ input a tree with ingroups and at least 1 outgroups output a list of rooted ingroup clades """ inclades = [] while True: max_score,direction,max_node = 0,"",None for node in root.iternodes(): front,back = 0,0 front_names_set = set(get_front_names(node)) for name in front_names_set: if name in outgroups: front = -1 break elif name in ingroups: front += 1 else: sys.exit("Check taxonID "+name) back_names_set = set(get_back_names(node,root)) for name in back_names_set: if name in outgroups: back = -1 break elif name in ingroups: back += 1 else: sys.exit("Check taxonID "+name) if front > max_score: max_score,direction,max_node = front,"front",node if back > max_score: max_score,direction,max_node = back,"back",node #print max_score,direction if max_score >= min_ingroup_taxa: if direction == "front": inclades.append(max_node) kink = max_node.prune() if len(root.leaves()) > 3: newnode,root = remove_kink(kink,root) else: break elif direction == "back": par = max_node.parent par.remove_child(max_node) max_node.prune() inclades.append(phylo3.reroot(root,par))#flip dirction if len(max_node.leaves()) > 3: max_node,root = remove_kink(max_node,max_node) else: break else: break return inclades
import phylo3, newick3 import sys if len(sys.argv) < 3: print __doc__ print "usage: roottrees <treesfile> <outgroupsfile>" sys.exit(0) treesfname = sys.argv[1] outgroupsfname = sys.argv[2] treesfile = open(treesfname,"r") outgroupsfile = open(outgroupsfname,"r") outgroup_names = [line.strip() for line in outgroupsfile.readlines()] rooted_trees = [] for line in treesfile: tree = newick3.parse(line) outgroup = phylo3.getMRCA(tree, outgroup_names) rooted_tree = phylo3.reroot(tree, outgroup) rooted_trees.append(rooted_tree) outfile = open(treesfname.rsplit(".tre",1)[0]+".rooted.tre","w") for tree in rooted_trees: outfile.write(newick3.to_string(tree)+";\n")