def cut_long_internal_branches(curroot, cutoff): """cut long branches and output all subtrees with at least 4 tips""" going = True subtrees = [] #store all subtrees after cutting while going: going = False #only keep going if long branches were found during last round for node in curroot.iternodes(): #Walk through nodes if node.istip or node == curroot: continue if node.nchildren == 1: node, curroot = remove_kink(node, curroot) going = True break child0, child1 = node.children[0], node.children[1] if node.length > cutoff: print node.length if not child0.istip and not child1.istip and child0.length + child1.length > cutoff: print child0.length + child1.length if count_taxa(child0) >= 4: subtrees.append(child0) if count_taxa(child1) >= 4: subtrees.append(child1) else: subtrees.append(node) node = node.prune() if len(curroot.leaves()) > 2: #no kink if only two left node, curroot = remove_kink(node, curroot) going = True break if count_taxa(curroot) >= 4: subtrees.append(curroot) #write out the residue after cutting return subtrees
def prune(score_tuple,node,root,pp_trees): if score_tuple[0] > score_tuple[1]: #prune front print "prune front" pp_trees.append(node) par = node.prune() if par != None and len(root.leaves()) >= 3: par,root = tree_utils.remove_kink(par,root) return root,node == root else: if node != root: #prune back par = node.parent #par--node< par.remove_child(node) if par.parent != None: par,root = tree_utils.remove_kink(par,root) node.prune() print "prune back" pp_trees.append(root) if len(node.leaves()) >= 3: node,newroot = tree_utils.remove_kink(node,node) else: newroot = node return newroot,False #original root was cutoff, not done yet
def main(inDIR, file_ending, branch_len_cutoff, min_taxa, outDIR): """cut long branches and output subtrees as .subtre files if uncut and nothing changed betwee .tre and .subtree copy the original .tre file to the outdir""" if inDIR[-1] != "/": inDIR += "/" if outDIR[-1] != "/": outDIR += "/" min_taxa = int(min_taxa) filecount = 0 cutoff = float(branch_len_cutoff) print "cutting branches longer than", cutoff for i in os.listdir(inDIR): if not i.endswith(file_ending): continue print i filecount += 1 with open(inDIR + i, "r") as infile: #only 1 tree in each file intree = newick3.parse(infile.readline()) try: with open(inDIR + i[:i.find(".tre")] + ".tre", "r") as infile: #the original .tre raw_tree_size = len( get_front_labels(newick3.parse(infile.readline()))) except: # did not refine this round. Use the .tre.tt.mm tree raw_tree_size = len(get_front_labels(intree)) num_taxa = count_taxa(intree) if num_taxa < min_taxa: print "Tree has", num_taxa, "less than", min_taxa, "taxa" else: print ".tre:", raw_tree_size, "tips; " + file_ending + ": " + str( len(get_front_labels(intree))) + " tips" subtrees = cut_long_internal_branches(intree, cutoff) if len(subtrees) == 0: print "No tree with at least", min_taxa, "taxa" #elif raw_tree_size == len(subtrees[0].leaves()): #copy(inDIR+i,outDIR+i) #print "written to out directory unchanged" else: count = 0 outsizes = "" for subtree in subtrees: if count_taxa(subtree) >= min_taxa: if subtree.nchildren == 2: #fix bifurcating roots from cutting temp, subtree = remove_kink(subtree, subtree) count += 1 with open( outDIR + i.split(".")[0] + "_" + str(count) + ".subtree", "w") as outfile: outfile.write(newick3.tostring(subtree) + ";\n") outsizes += str(len(subtree.leaves())) + ", " print count, "tree(s) wirtten. Sizes:", outsizes assert filecount > 0, "No file end with " + file_ending + " in " + inDIR
def mask_monophyletic_tips(curroot,ignore=[]): going = True while going and curroot != None and len(curroot.leaves()) >= 4: going = False for node in curroot.iternodes(): # walk through nodes if not node.istip: continue # only look at tips name = get_name(node.label).split("_")[1] for sister in node.get_sisters(): if sister.istip and name==get_name(sister.label).split("_")[1]: # mask node = sister.prune() if len(curroot.leaves()) >= 4: if (node==curroot and node.nchildren==2) or (node!=curroot and node.nchildren==1): node,curroot = remove_kink(node,curroot) going = True break return curroot
def mask_paraphyletic_tips(curroot,ignore=[]): going = True while going and curroot != None and len(curroot.leaves()) >= 4: going = False for node in curroot.iternodes(): #walk through nodes if not node.istip: continue #only look at tips name = get_name(node.label).split("_")[1] parent = node.parent if node == curroot or parent == curroot or parent == None: continue #no paraphyletic tips for the root for para in parent.get_sisters(): if para.istip and name==get_name(para.label).split("_")[1]: # mask node = para.prune() if len(curroot.leaves()) >= 4: if (node==curroot and node.nchildren==2) or (node!=curroot and node.nchildren==1): node,curroot = remove_kink(node,curroot) going = True break return curroot
def main(inDIR,file_ending,branch_len_cutoff,min_taxa,outDIR,log): """cut long branches and output subtrees as .subtre files if uncut and nothing changed betwee .tre and .subtree copy the original .tre file to the outdir""" if inDIR[-1] != "/": inDIR += "/" min_taxa = int(min_taxa) filecount = 0 cutoff = float(branch_len_cutoff) print("cutting branches longer than",cutoff) for i in os.listdir(inDIR): if not i.endswith(file_ending): continue #print i filecount += 1 with open(inDIR+i,"r") as infile: #only 1 tree in each file intree = tree_reader.read_tree_string(infile.readline()) try: with open(inDIR+i[:i.find(".tre")]+".tre","r") as infile: #the original .tre raw_tree_size = len(get_front_labels(tree_reader.read_tree_string(infile.readline()))) except: # did not refine this round. Use the .tre.tt.mm tree raw_tree_size = len(get_front_labels(intree)) num_taxa = len(intree.leaves()) if num_taxa < min_taxa: print("Tree has",num_taxa,"less than", min_taxa,"taxa") else: #print ".tre:",raw_tree_size,"tips; "+file_ending+": "+str(len(get_front_labels(intree)))+" tips" subtrees = cut_long_internal_branches(intree,cutoff,min_taxa) if len(subtrees) == 0: print("No tree with at least", min_taxa, "taxa") else: count = 0 outsizes = "" for subtree in subtrees: if len(subtree.leaves()) >= min_taxa: if len(subtree.children) == 2: #fix bifurcating roots from cutting temp,subtree = remove_kink(subtree,subtree) count += 1 outname = outDIR+"/"+i.split(".")[0]+"_"+str(count)+".subtree" print(outname) with open(outname,"w") as outfile: outfile.write(subtree.get_newick_repr(True)+";\n") outsizes += str(len(subtree.leaves()))+", " print(count,"tree(s) written. Sizes:",outsizes)
def mask_monophyletic_tips(curroot, unamb_chrDICT): going = True while going and len(curroot.leaves()) >= 4: going = False for node in curroot.iternodes(): #walk through nodes if not node.istip: continue #only look at tips for sister in node.get_sisters(): if sister.istip and get_name(node.label) == get_name( sister.label): #masking #print node.label,unamb_chrDICT[node.label],sister.label,unamb_chrDICT[sister.label] if unamb_chrDICT[node.label] > unamb_chrDICT[sister.label]: node = sister.prune() else: node = node.prune() if len(curroot.leaves()) >= 4: if (node == curroot and node.nchildren == 2) or ( node != curroot and node.nchildren == 1): node, curroot = remove_kink(node, curroot) going = True break return curroot
def mask_paraphyletic_tips(curroot, unamb_chrDICT): going = True while going and len(curroot.leaves()) >= 4: going = False for node in curroot.iternodes(): #walk through nodes if not node.istip: continue #only look at tips parent = node.parent if node == curroot or parent == curroot: continue #no paraphyletic tips for the root for para in parent.get_sisters(): if para.istip and get_name(node.label) == get_name(para.label): if unamb_chrDICT[node.label] > unamb_chrDICT[para.label]: node = para.prune() else: node = node.prune() if len(curroot.leaves()) >= 4: if (node == curroot and node.nchildren == 2) or ( node != curroot and node.nchildren == 1): node, curroot = remove_kink(node, curroot) going = True break return curroot