def prepare_codeml(round, fasta_file_name, species_tree, marks, wd): """Creates all files necessary for codeml performance, including marked, pruned tree, control files and alignment in phy format""" run_name = "r" + str(round).zfill(2) tree = EvolTree(species_tree) # init tree every time a fasta is open name = fasta_file_name.replace(args.suffix, "") # print(name) # create path and change dir create_dir(wd, name) os.chdir(os.path.join(wd, name)) genomes = fasta_ids(os.path.join(wd, fasta_file_name)) # genomes contained in fasta file # Tree prunning prune_tree(tree, genomes) # Mark branches if branch or branch-site models selected if args.mode in ["BM", "BS"]: mark_spp = list(set(marks[str(round)]).intersection(genomes)) mark_branches(tree, mark_spp) # Check monophyly of taxa if -gene_trees option # Individual gene trees if -gene_trees option if args.single_trees: fast_tree(os.path.join(wd, fasta_file_name), os.path.join(wd, name, fasta_file_name + ".ftree")) gene_tree = midpoint_root(tree_features(os.path.join(wd, name, fasta_file_name + ".ftree"))) if not is_monophyletic(gene_tree, mark_spp): logging.warning("Check monophyly in the clade-of-interest: {}".format(name)) tree.write(outfile=name + ".tree") # write tree with only topology # File format converter: MSA fasta --> Phylip fasta2phy(os.path.join(wd, fasta_file_name), name + ".phy") # Create alt and null ctl files control_files(wd, args.mode, name, run_name)
print tree raw_input('\n tree and alignment loaded\nHit some key, to start computation of branch site models A and A1 on each branch.\n') print 'running model M0, for comparison with branch-site models...' tree.run_model('M0') # each node/leaf has two kind of identifiers node_id and paml_id, to mark nodes we have to specify # the node_id of the nodes we want to mark, and the kind of mark in this way: for leaf in tree: leaf.node_id print '\n---------\nNow working with leaf ' + leaf.name tree.mark_tree([leaf.node_id], marks=['#1']) print tree.write() # to organize a bit, we name model with the name of the marked node # any character after the dot, in model name, is not taken into account # for computation. (have a look in /tmp/ete3.../bsA.. directory) print 'running model bsA and bsA1' tree.run_model('bsA.'+ leaf.name) tree.run_model('bsA1.' + leaf.name) print 'p-value of positive selection for sites on this branch is: ' ps = tree.get_most_likely('bsA.' + leaf.name, 'bsA1.'+ leaf.name) rx = tree.get_most_likely('bsA1.'+ leaf.name, 'M0') print str(ps) print 'p-value of relaxation for sites on this branch is: ' print str(rx) if ps < 0.05 and float(bsA.wfrg2a) > 1: print 'we have positive selection on sites on this branch' elif rx<0.05 and ps>=0.05:
tree.prune(taxa_in_alignment, preserve_branch_length=True) test_taxa = [] with open(test_taxa_file, 'r') as test_taxa_list: for taxon in test_taxa_list: taxon = taxon.rstrip() test_taxa.append(taxon) nodes_to_mark = set() # set since we want it to be all unique ids # Mark the test taxa for taxon in test_taxa: taxon_node = tree & taxon # ete3 notation for finding a node within a tree taxon_id = taxon_node.node_id # mark_tree only takes node_ids, not labels nodes_to_mark.add(taxon_id) # Find internal nodes below the test taxa and mark them for i in range(len(test_taxa), 1, -1): taxa_groups = [x for x in combinations(test_taxa, i)] for group in taxa_groups: common_node = tree.get_common_ancestor(*group) taxon_id = common_node.node_id nodes_to_mark.add(taxon_id) #TODO change the names of the nodes for mark_id in nodes_to_mark: test_node = tree.search_nodes(node_id=mark_id)[0] test_node.name += '{test}' tree.write(outfile=out_tree_name, format=1)
from ete3 import EvolTree from ete3 import NodeStyle tree = EvolTree ("data/S_example/measuring_S_tree.nw") tree.link_to_alignment ('data/S_example/alignment_S_measuring_evol.fasta') print (tree) print ('Tree and alignment loaded.') input ('Tree will be mark in order to contrast Gorilla and Chimpanzee as foreground \nspecies.') marks = ['1', '3', '7'] tree.mark_tree (marks, ['#1'] * 3) print (tree.write ()) print ('we can easily colorize marked branches') # display marked branches in orange for node in tree.traverse (): if not hasattr (node, 'mark'): continue if node.mark == '': continue node.img_style = NodeStyle () node.img_style ['bgcolor'] = '#ffaa00' tree.show() print ('''now running branch models free branch models, 2 groups of branches, one with Gorilla and chimp, the other with the rest of the phylogeny
from ete3 import NodeStyle tree = EvolTree("data/S_example/measuring_S_tree.nw") tree.link_to_alignment('data/S_example/alignment_S_measuring_evol.fasta') print(tree) print('Tree and alignment loaded.') input( 'Tree will be mark in order to contrast Gorilla and Chimpanzee as foreground \nspecies.' ) marks = ['1', 3, '7'] tree.mark_tree(marks, ['#1'] * 3) print(tree.write()) # display marked branches in orange for node in tree.traverse(): if not hasattr(node, 'mark'): continue if node.mark == '': continue node.img_style = NodeStyle() node.img_style['bgcolor'] = '#ffaa00' tree.show() print('''now running branch-site models C and D that represents the addition of one class of sites in on specific branch. These models must be compared to null models M1 and M3. if branch-site models are detected to be significantly better,
from ete3 import EvolTree import sys treepath=sys.argv[1] treeout=sys.argv[2] t = EvolTree(treepath) ratites = {'aptHaa', 'aptRow', 'aptOwe', 'strCam', 'droNov', 'casCas', 'rheAme', 'rhePen'} #annotate leaves for leaf in t.traverse(): if leaf.is_leaf(): if leaf.name in ratites: leaf.add_features(mark="{RatiteLeaf}") else: #internal node, get all leaf names and make sure all are in ratites desc=set(leaf.get_leaf_names()) checkDesc=desc - ratites if not checkDesc: leaf.add_features(mark="{RatiteInternal}") t.write(outfile=treeout)