Exemple #1
0
def report_taxa(tree_file, scheme='newick', listing=True, counting=True):
    a_tree = Tree()
    a_tree.read_from_path(tree_file, scheme)
    if listing:
        for leaf in a_tree.leaf_nodes():
            print(leaf.taxon.label)
    if counting:
        print('Taxa #: ' + str(len(a_tree.leaf_nodes())))
Exemple #2
0
def is_valid_newick(path, source_sequence_names = None):
    """Is the file located at 'path' a valid newick-formatted tree?
    This method returns the tuple (True/False, error message)
    if source_sequence_names != None, then the tree should contain taxa from the list of sequence names."""
    retflag = False
    emsg = ""
    try:
        test_tree = Tree()
        test_tree.read_from_path(path, "newick")
    except Exception as e:
        emsg = e.__str__()
    else:
        retflag = True
    return (retflag, emsg)
def get_bls(tree_path):
    # clean the tree of any support values, so we're left only with BLs
    bls = []
    t = Tree()
    t.read_from_path( tree_path, "newick" )
    
    i = t.level_order_edge_iter()
    while True:
        try:
            e = i.next() # in Python 2.x
            len = e.length
            if len != None:
                bls.append( len )
        except StopIteration:
            break
    return bls
Exemple #4
0
def pretty_print_trees():    
    print "\n. OK, I'm reformatting the RAxML results for nice printing..."
    """Reformats the phylogeny, such that each taxon label looks like this:
    trna12-AlaTCT[6/7]
    . . . where 6 is the number of sequences collapsed into this sequence, and 7 is the number of total tRNAs in the databse."""
    species_list = species_trna_seq.keys()
    species_list.sort()
    for species in species_list:
        #print species_trna_dups[species]
        treepath = RAXMLDIR + "/RAxML_result." + species
        if False == os.path.exists( treepath ):
            continue
        newtreepath = TREEDIR + "/" + species + ".tree"
        t = Tree()
        t.read_from_path(treepath, "newick")
        print " -->", treepath
        trna_count = count_trna_types(species)
        #print trna_count
        newts = t.__str__()
        for taxon in t.taxon_set:
            #print "372:", taxon.label
            #thisac = get_ac_from_name(taxon.label)
            thisac = species_trna_mtrip[species][taxon.label]
            count_this_type = trna_count[thisac]
            count_dups = 0
            if taxon.label in species_trna_dups[species]:
                count_dups = species_trna_dups[species][taxon.label].__len__() + 1
            if count_dups <= 1:
                count_dups = ""
            else:
                count_dups = "(" + count_dups.__str__() + ")"

            mark = ""
            if species in species_switchedtrnas:
                print "534:", species_switchedtrnas[species]
                if species_switchedtrnas[species].__contains__(taxon.label):
                    mark = "***"

            newts = re.sub( taxon.label, (taxon.label + count_dups + "[" + count_this_type.__str__()+ "]" + mark), newts)
        fout = open(newtreepath, "w")
        fout.write( newts + "\n" )
        fout.close()
import os
import sys
from dendropy import Tree

t1path = sys.argv[1]
t2path = sys.argv[2]

t1 = Tree()
t1.read_from_path(t1path, "newick")
t2 = Tree()
t2.read_from_path(t2path, "newick")

s = t1.symmetric_difference(t2)
s = t2.symmetric_difference(t1)
print "symmetric diff. = ", s

print t1.length()
print t2.length()
Exemple #6
0
	def read_from_path(filename, schema="newick", taxon_set=None):
		t = Tree(taxon_set=taxon_set)
                t.read_from_path(filename, schema)

		return PhylogeneticTree(t)
import os
import sys
from dendropy import Tree

t1path = sys.argv[1]
t2path = sys.argv[2]

t1 = Tree()
t1.read_from_path(t1path, "newick")
t2 = Tree()
t2.read_from_path(t2path, "newick")


s = t1.symmetric_difference(t2)
s = t2.symmetric_difference(t1)
print "symmetric diff. = ", s

print t1.length()
print t2.length()

Exemple #8
0
def find_anticodon_switches():    
    print "\n. OK, I'm searching for switched anticodons. . ."
    species_list = species_trna_seq.keys()
    species_list.sort()
    #print "504:", species_list
    allpath = DATADIR + "/all.acswitches.txt"
    allout = open(allpath, "w")
    allout.write("Species\tKingdom\tswitch type\tfrom\tto\td_diff\td_same\n")
    #
    # FOR EACH SPECIES. . . 
    #
    for species in species_list:
        if species in species_kingdom:
            this_kingdom = species_kingdom[species]
        else:
            this_kingdom = "???"
                
        print species
        rpath = SUMMARYDIR + "/" + species + ".acswitches.txt"
        treepath = RAXMLDIR + "/RAxML_result." + species
        if os.path.exists(treepath):
            species_nscount[species] = 0
            species_scount[species] = 0
            species_ac_nscount[species] = {}
            species_ac_scount[species] = {}
            fout = open(rpath, "w") # a summary of found ac switches will be written here.
            t = Tree()
            t.read_from_path(treepath, "newick")
            print "\n. Calculating all pairwise distances between sequences on tree:", treepath
            pdm = treecalc.PatristicDistanceMatrix(t) # matrix of pairwise distances between taxa
            
            asses_monophyly(t, species)

            # First, sort the leaf nodes by their anticodon preference.
            ac_labels = {} # key = a.c., value = list of Node objects
            for i, t1 in enumerate(t.taxon_set):
                #thisac = get_ac_from_name( t1.label )
                thisac = species_trna_mtrip[species][t1.label]
                if thisac not in ac_labels:
                    ac_labels[ thisac ] = []
                ac_labels[ thisac ].append( t1.label )
            
            #
            # FOR EACH tRNA SEQUENCE. . .
            # Goal: for each tRNA find the min. distance to another tRNA with the same
            # anticodon, then find the min. distance to another tRNA that is of a different
            # anticodon type.
            print "\."
            for i, t1 in enumerate(t.taxon_set):                
                min2same = None
                min2diff = None
                closest_diff = None # taxon label of closest same-anti-codon tRNA sequence to sequence t1.
                closest_same = None
                #myac = get_ac_from_name(t1.label)
                myac = species_trna_mtrip[species][t1.label]
                #print t1.label
                if ac_labels[myac].__len__() <= 1:
                    continue # skip tRNAs for which they are the only representative of their AC.
                ac_labels[myac].remove( t1.label )
                myaa = get_aa_from_name(t1.label)
                if myaa == "Met":
                    continue
                for t2 in t.taxon_set:
                    if t1 == t2:
                        continue
                    #thisac = get_ac_from_name(t2.label)
                    thisac = species_trna_mtrip[species][t2.label]
                    d = pdm(t1, t2)
                    if myac == thisac:
                        if min2same == None:
                            min2same = d
                            closest_same = t2.label
                        elif min2same > d:
                            min2same = d
                            closest_same = t2.label
                    elif myac != thisac and ac_labels[thisac].__len__() > 1:
                        if min2diff == None:
                            min2diff = d
                            closest_diff = t2.label
                        elif min2diff > d:
                            min2diff = d
                            closest_diff = t2.label
                if min2same == None:
                    min2same = 0.0 # in the event of singletons
                if min2diff == None:
                    min2diff = 0.0 # in the event of sparse genomes with few tRNAs.
                
                if closest_diff == None:
                    continue
                if min2same > min2diff and min2same-min2diff > SWITCH_DIFF_THRESHOLD and min2diff != None and min2same > SWITCH_DISTANCE_THRESHOLD: 
                    # . . . then we've identified an anticodon shift:
                    if species not in species_switchedtrnas:
                        species_switchedtrnas[species] = []
                    if t1.label not in species_switchedtrnas[species]:
                        species_switchedtrnas[species].append( t1.label )
                    
                    thataa = get_aa_from_name(closest_diff)
                    if thataa  == myaa: # synonymous shift
                        species_scount[species] += 1
                        fout.write("Synonymous" + " " + closest_diff + " -> " + t1.label + "\t" + min2diff.__str__()  + "\t" + min2same.__str__()  + "\n")
                        allout.write(species + "\t" + this_kingdom + "\tSY\t" + closest_diff + "\t" + t1.label + "\t%.4f"%min2diff + "\t%.3f"%min2same + "\n")                                     
                        print "  . Syn." + " " + closest_diff + " -> " + t1.label + "\t" + min2diff.__str__()  + "\t" + min2same.__str__() 
                        if myac not in species_ac_scount[species]:
                            species_ac_scount[species][myac] = 1
                        else:
                            species_ac_scount[species][myac] += 1
                    elif thataa != myaa and thataa != "Met": # nonsynonymous shift
                        species_nscount[species] += 1
                        fout.write("Nonsynonymous" + " " + closest_diff + " -> " + t1.label + "\t" + min2diff.__str__()  + "\t" + min2same.__str__() + "\n")
                        allout.write(species + "\t" + this_kingdom + "\tNS\t" + closest_diff + "\t" + t1.label + "\t%.4f"%min2diff + "\t%.3f"%min2same + "\n")   
                        print "  . Nonsyn." + " " + closest_diff + " -> " + t1.label + "\t" + min2diff.__str__()  + "\t" + min2same.__str__()
                        if myac not in species_ac_nscount[species]:
                            species_ac_nscount[species][myac] = 1
                        else:
                            species_ac_nscount[species][myac] += 1
            
            if species_nscount[species] == 0 and species_scount[species] == 0:
                fout.write("No detected switched anitcodons for " + species + "\n") 
            fout.close()
            print ".", species, "has", species_nscount[species], "putative nonsynonymously switched anticodons."    
            print ".", species, "has", species_scount[species], "putative synonymously switched anticodons."        
        else:
            print ". I skipped species", species, "because I can't find the ML tree."
    allout.close()
Exemple #9
0
def get_tree_length(path):
    """Input: path to newick tree. Returns the sum of branches on the tree."""
    t = Tree()
    t.read_from_path(path, "newick")
    return t.length()