Exemple #1
0
def prepare_codeml(round, fasta_file_name, species_tree, marks, wd):
    """Creates all files necessary for codeml performance,
    including marked, pruned tree, control files and alignment in phy format"""
    run_name = "r" + str(round).zfill(2)
    tree = EvolTree(species_tree)  # init tree every time a fasta is open
    name = fasta_file_name.replace(args.suffix, "")
    # print(name)
    # create path and change dir
    create_dir(wd, name)
    os.chdir(os.path.join(wd, name))
    genomes = fasta_ids(os.path.join(wd, fasta_file_name))  # genomes contained in fasta file
    # Tree prunning
    prune_tree(tree, genomes)

    # Mark branches if branch or branch-site models selected
    if args.mode in ["BM", "BS"]:
        mark_spp = list(set(marks[str(round)]).intersection(genomes))
        mark_branches(tree, mark_spp)

        # Check monophyly of taxa if -gene_trees option
        # Individual gene trees if -gene_trees option
        if args.single_trees:
            fast_tree(os.path.join(wd, fasta_file_name),
                      os.path.join(wd, name, fasta_file_name + ".ftree"))
            gene_tree = midpoint_root(tree_features(os.path.join(wd, name, fasta_file_name + ".ftree")))
            if not is_monophyletic(gene_tree, mark_spp):
                logging.warning("Check monophyly in the clade-of-interest: {}".format(name))

    tree.write(outfile=name + ".tree")  # write tree with only topology
    # File format converter: MSA fasta --> Phylip
    fasta2phy(os.path.join(wd, fasta_file_name), name + ".phy")

    # Create alt and null ctl files
    control_files(wd, args.mode, name, run_name)
Exemple #2
0
print tree

raw_input('\n   tree and alignment loaded\nHit some key, to start computation of branch site models A and A1 on each branch.\n')

print 'running model M0, for comparison with branch-site models...'
tree.run_model('M0')

# each node/leaf has two kind of identifiers node_id and paml_id, to mark nodes we have to specify
# the node_id of the nodes we want to mark, and the kind of mark in this way:

for leaf in tree:
    leaf.node_id
    print '\n---------\nNow working with leaf ' + leaf.name
    tree.mark_tree([leaf.node_id], marks=['#1'])
    print tree.write()
    # to organize a bit, we name model with the name of the marked node
    # any character after the dot, in model name, is not taken into account
    # for computation. (have a look in /tmp/ete3.../bsA.. directory)
    print 'running model bsA and bsA1'
    tree.run_model('bsA.'+ leaf.name)
    tree.run_model('bsA1.' + leaf.name)
    print 'p-value of positive selection for sites on this branch is: '
    ps = tree.get_most_likely('bsA.' + leaf.name, 'bsA1.'+ leaf.name)
    rx = tree.get_most_likely('bsA1.'+ leaf.name, 'M0')
    print str(ps)
    print 'p-value of relaxation for sites on this branch is: '
    print str(rx)
    if ps < 0.05 and float(bsA.wfrg2a) > 1:
        print 'we have positive selection on sites on this branch'
    elif rx<0.05 and ps>=0.05:
        tree.prune(taxa_in_alignment, preserve_branch_length=True)

test_taxa = []
with open(test_taxa_file, 'r') as test_taxa_list:
    for taxon in test_taxa_list:
        taxon = taxon.rstrip()
        test_taxa.append(taxon)

nodes_to_mark = set()  # set since we want it to be all unique ids

# Mark the test taxa
for taxon in test_taxa:
    taxon_node = tree & taxon  # ete3 notation for finding a node within a tree
    taxon_id = taxon_node.node_id  # mark_tree only takes node_ids, not labels
    nodes_to_mark.add(taxon_id)

# Find internal nodes below the test taxa and mark them
for i in range(len(test_taxa), 1, -1):
    taxa_groups = [x for x in combinations(test_taxa, i)]
    for group in taxa_groups:
        common_node = tree.get_common_ancestor(*group)
        taxon_id = common_node.node_id
        nodes_to_mark.add(taxon_id)

#TODO change the names of the nodes
for mark_id in nodes_to_mark:
    test_node = tree.search_nodes(node_id=mark_id)[0]
    test_node.name += '{test}'

tree.write(outfile=out_tree_name, format=1)
from ete3 import EvolTree
from ete3 import NodeStyle

tree = EvolTree ("data/S_example/measuring_S_tree.nw")
tree.link_to_alignment ('data/S_example/alignment_S_measuring_evol.fasta')

print (tree)

print ('Tree and alignment loaded.')
input ('Tree will be mark in order to contrast Gorilla and Chimpanzee as foreground \nspecies.')

marks = ['1', '3', '7']

tree.mark_tree (marks, ['#1'] * 3)
print (tree.write ())

print ('we can easily colorize marked branches')
# display marked branches in orange
for node in tree.traverse ():
    if not hasattr (node, 'mark'):
        continue
    if node.mark == '':
        continue
    node.img_style = NodeStyle ()
    node.img_style ['bgcolor'] = '#ffaa00'
tree.show()

print ('''now running branch models
free branch models, 2 groups of branches, one with Gorilla and
chimp, the other with the rest of the phylogeny
Exemple #5
0
from ete3 import NodeStyle

tree = EvolTree("data/S_example/measuring_S_tree.nw")
tree.link_to_alignment('data/S_example/alignment_S_measuring_evol.fasta')

print(tree)

print('Tree and alignment loaded.')
input(
    'Tree will be mark in order to contrast Gorilla and Chimpanzee as foreground \nspecies.'
)

marks = ['1', 3, '7']

tree.mark_tree(marks, ['#1'] * 3)
print(tree.write())

# display marked branches in orange
for node in tree.traverse():
    if not hasattr(node, 'mark'):
        continue
    if node.mark == '':
        continue
    node.img_style = NodeStyle()
    node.img_style['bgcolor'] = '#ffaa00'
tree.show()

print('''now running branch-site models C and D that represents
the addition of one class of sites in on specific branch.
These models must be compared to null models M1 and M3.
if branch-site models are detected to be significantly better,
from ete3 import EvolTree
import sys

treepath=sys.argv[1]
treeout=sys.argv[2]

t = EvolTree(treepath)
ratites = {'aptHaa', 'aptRow', 'aptOwe', 'strCam', 'droNov', 'casCas', 'rheAme', 'rhePen'}
#annotate leaves
for leaf in t.traverse():
    if leaf.is_leaf():
        if leaf.name in ratites:
            leaf.add_features(mark="{RatiteLeaf}")
    else:
        #internal node, get all leaf names and make sure all are in ratites
        desc=set(leaf.get_leaf_names())
        checkDesc=desc - ratites
        if not checkDesc:
            leaf.add_features(mark="{RatiteInternal}")

t.write(outfile=treeout)