def report_taxa(tree_file, scheme='newick', listing=True, counting=True): a_tree = Tree() a_tree.read_from_path(tree_file, scheme) if listing: for leaf in a_tree.leaf_nodes(): print(leaf.taxon.label) if counting: print('Taxa #: ' + str(len(a_tree.leaf_nodes())))
def is_valid_newick(path, source_sequence_names = None): """Is the file located at 'path' a valid newick-formatted tree? This method returns the tuple (True/False, error message) if source_sequence_names != None, then the tree should contain taxa from the list of sequence names.""" retflag = False emsg = "" try: test_tree = Tree() test_tree.read_from_path(path, "newick") except Exception as e: emsg = e.__str__() else: retflag = True return (retflag, emsg)
def get_bls(tree_path): # clean the tree of any support values, so we're left only with BLs bls = [] t = Tree() t.read_from_path( tree_path, "newick" ) i = t.level_order_edge_iter() while True: try: e = i.next() # in Python 2.x len = e.length if len != None: bls.append( len ) except StopIteration: break return bls
def pretty_print_trees(): print "\n. OK, I'm reformatting the RAxML results for nice printing..." """Reformats the phylogeny, such that each taxon label looks like this: trna12-AlaTCT[6/7] . . . where 6 is the number of sequences collapsed into this sequence, and 7 is the number of total tRNAs in the databse.""" species_list = species_trna_seq.keys() species_list.sort() for species in species_list: #print species_trna_dups[species] treepath = RAXMLDIR + "/RAxML_result." + species if False == os.path.exists( treepath ): continue newtreepath = TREEDIR + "/" + species + ".tree" t = Tree() t.read_from_path(treepath, "newick") print " -->", treepath trna_count = count_trna_types(species) #print trna_count newts = t.__str__() for taxon in t.taxon_set: #print "372:", taxon.label #thisac = get_ac_from_name(taxon.label) thisac = species_trna_mtrip[species][taxon.label] count_this_type = trna_count[thisac] count_dups = 0 if taxon.label in species_trna_dups[species]: count_dups = species_trna_dups[species][taxon.label].__len__() + 1 if count_dups <= 1: count_dups = "" else: count_dups = "(" + count_dups.__str__() + ")" mark = "" if species in species_switchedtrnas: print "534:", species_switchedtrnas[species] if species_switchedtrnas[species].__contains__(taxon.label): mark = "***" newts = re.sub( taxon.label, (taxon.label + count_dups + "[" + count_this_type.__str__()+ "]" + mark), newts) fout = open(newtreepath, "w") fout.write( newts + "\n" ) fout.close()
import os import sys from dendropy import Tree t1path = sys.argv[1] t2path = sys.argv[2] t1 = Tree() t1.read_from_path(t1path, "newick") t2 = Tree() t2.read_from_path(t2path, "newick") s = t1.symmetric_difference(t2) s = t2.symmetric_difference(t1) print "symmetric diff. = ", s print t1.length() print t2.length()
def read_from_path(filename, schema="newick", taxon_set=None): t = Tree(taxon_set=taxon_set) t.read_from_path(filename, schema) return PhylogeneticTree(t)
def find_anticodon_switches(): print "\n. OK, I'm searching for switched anticodons. . ." species_list = species_trna_seq.keys() species_list.sort() #print "504:", species_list allpath = DATADIR + "/all.acswitches.txt" allout = open(allpath, "w") allout.write("Species\tKingdom\tswitch type\tfrom\tto\td_diff\td_same\n") # # FOR EACH SPECIES. . . # for species in species_list: if species in species_kingdom: this_kingdom = species_kingdom[species] else: this_kingdom = "???" print species rpath = SUMMARYDIR + "/" + species + ".acswitches.txt" treepath = RAXMLDIR + "/RAxML_result." + species if os.path.exists(treepath): species_nscount[species] = 0 species_scount[species] = 0 species_ac_nscount[species] = {} species_ac_scount[species] = {} fout = open(rpath, "w") # a summary of found ac switches will be written here. t = Tree() t.read_from_path(treepath, "newick") print "\n. Calculating all pairwise distances between sequences on tree:", treepath pdm = treecalc.PatristicDistanceMatrix(t) # matrix of pairwise distances between taxa asses_monophyly(t, species) # First, sort the leaf nodes by their anticodon preference. ac_labels = {} # key = a.c., value = list of Node objects for i, t1 in enumerate(t.taxon_set): #thisac = get_ac_from_name( t1.label ) thisac = species_trna_mtrip[species][t1.label] if thisac not in ac_labels: ac_labels[ thisac ] = [] ac_labels[ thisac ].append( t1.label ) # # FOR EACH tRNA SEQUENCE. . . # Goal: for each tRNA find the min. distance to another tRNA with the same # anticodon, then find the min. distance to another tRNA that is of a different # anticodon type. print "\." for i, t1 in enumerate(t.taxon_set): min2same = None min2diff = None closest_diff = None # taxon label of closest same-anti-codon tRNA sequence to sequence t1. closest_same = None #myac = get_ac_from_name(t1.label) myac = species_trna_mtrip[species][t1.label] #print t1.label if ac_labels[myac].__len__() <= 1: continue # skip tRNAs for which they are the only representative of their AC. ac_labels[myac].remove( t1.label ) myaa = get_aa_from_name(t1.label) if myaa == "Met": continue for t2 in t.taxon_set: if t1 == t2: continue #thisac = get_ac_from_name(t2.label) thisac = species_trna_mtrip[species][t2.label] d = pdm(t1, t2) if myac == thisac: if min2same == None: min2same = d closest_same = t2.label elif min2same > d: min2same = d closest_same = t2.label elif myac != thisac and ac_labels[thisac].__len__() > 1: if min2diff == None: min2diff = d closest_diff = t2.label elif min2diff > d: min2diff = d closest_diff = t2.label if min2same == None: min2same = 0.0 # in the event of singletons if min2diff == None: min2diff = 0.0 # in the event of sparse genomes with few tRNAs. if closest_diff == None: continue if min2same > min2diff and min2same-min2diff > SWITCH_DIFF_THRESHOLD and min2diff != None and min2same > SWITCH_DISTANCE_THRESHOLD: # . . . then we've identified an anticodon shift: if species not in species_switchedtrnas: species_switchedtrnas[species] = [] if t1.label not in species_switchedtrnas[species]: species_switchedtrnas[species].append( t1.label ) thataa = get_aa_from_name(closest_diff) if thataa == myaa: # synonymous shift species_scount[species] += 1 fout.write("Synonymous" + " " + closest_diff + " -> " + t1.label + "\t" + min2diff.__str__() + "\t" + min2same.__str__() + "\n") allout.write(species + "\t" + this_kingdom + "\tSY\t" + closest_diff + "\t" + t1.label + "\t%.4f"%min2diff + "\t%.3f"%min2same + "\n") print " . Syn." + " " + closest_diff + " -> " + t1.label + "\t" + min2diff.__str__() + "\t" + min2same.__str__() if myac not in species_ac_scount[species]: species_ac_scount[species][myac] = 1 else: species_ac_scount[species][myac] += 1 elif thataa != myaa and thataa != "Met": # nonsynonymous shift species_nscount[species] += 1 fout.write("Nonsynonymous" + " " + closest_diff + " -> " + t1.label + "\t" + min2diff.__str__() + "\t" + min2same.__str__() + "\n") allout.write(species + "\t" + this_kingdom + "\tNS\t" + closest_diff + "\t" + t1.label + "\t%.4f"%min2diff + "\t%.3f"%min2same + "\n") print " . Nonsyn." + " " + closest_diff + " -> " + t1.label + "\t" + min2diff.__str__() + "\t" + min2same.__str__() if myac not in species_ac_nscount[species]: species_ac_nscount[species][myac] = 1 else: species_ac_nscount[species][myac] += 1 if species_nscount[species] == 0 and species_scount[species] == 0: fout.write("No detected switched anitcodons for " + species + "\n") fout.close() print ".", species, "has", species_nscount[species], "putative nonsynonymously switched anticodons." print ".", species, "has", species_scount[species], "putative synonymously switched anticodons." else: print ". I skipped species", species, "because I can't find the ML tree." allout.close()
def get_tree_length(path): """Input: path to newick tree. Returns the sum of branches on the tree.""" t = Tree() t.read_from_path(path, "newick") return t.length()