예제 #1
0
def run_evol_py(tree,
                alg,
                branch_model,
                site_models,
                workir='data/evol_output',
                tool_dir="ete3_apps/bin"):
    print(tree, alg, branch_model, site_models)
    builtin_apps_path = None
    builtin_apps_path = os.path.join(os.path.split(ete3_path)[0], tool_dir)

    tree = EvolTree(tree, binpath=builtin_apps_path)
    tree.link_to_alignment(alg)
    tree.workdir = workir

    ###branch model
    if branch_model:
        branch_model = str(branch_model)
        tree.run_model(branch_model)
        print(tree.get_evol_model(branch_model))

    ### site model
    for site_model in site_models:
        tree.run_model(site_model)
        #tree.run_model('SLR.lele')

    global evol_output_dir, final_evol_tree
    evol_output_dir = workir
    final_evol_tree = evol_output_dir + '/tree_evol_result.png'

    tree.render(final_evol_tree,
                layout=evol_clean_layout,
                histfaces=site_models)
    return tree
예제 #2
0
    def run(self, pamlsrc, output_folder, model='M1'):
        """Run PAML using ETE.

        The default model is M1 as it is best for orthology inference in
        our case. You can use models `M2`, `M0`, `M3`.

        Ensure that you have the correct path to your codeml binary. It should
        be in the paml `/bin`.

        :param pamlsrc: Path to the codemly binary.
        :param output_folder: The name of the output folder.
        :param model: The model to be used. (Default value = 'M1')
        """

        # Import the newick tree
        tree = EvolTree('temptree.nw')

        # Import the alignment
        tree.link_to_alignment(self.alignmentfile)

        tree.workdir = self.workdir

        # Set the binpath of the codeml binary
        tree.execpath = pamlsrc
        # Run the model M1, M2, M3, or M0
        model_path = model + '.' + output_folder
        tree.run_model(model_path)
        self.ete3paml_log.info('Codeml is generating data in %s.' % model_path)
예제 #3
0
    def main(self):
        """The main function for running the test."""

        print("Running model %s paml on input." % str(self.defaultmodel))

        tree = EvolTree(self.tree)  # Import the newick tree
        tree.link_to_alignment(self.alignment)  # Import the alignment
        tree.workdir = self.workdir  # Set the working directory
        tree.execpath = self.pamlpath  # Set the binpath of the codeml binary
        tree.run_model(self.defaultmodel)  # Run the codeml model
예제 #4
0
def count_omega(align_file, gene_name):
    print(gene_name)
    tree = EvolTree(tree_file)
    tree.link_to_alignment(align_file)
    #
    # #free branch ratio count
    tree.run_model('fb')
    fb_results = tree.get_evol_model('fb')
    print(fb_results)
    with open(temp, 'w') as temp_file:
        temp_file.write(str(fb_results))
    write_in_table(gene_name)
예제 #5
0
def ete3paml(gene, paml_path, workdir='data/paml-output/', model='M1'):
    """ Use ETE3's integration with PAML"""

    # Import the species tree to compare species that are present in alignment
    # file
    t = Tree('data/initial-data/species_tree.nw', format=1)
    orgsfile = pd.read_csv('data/initial-data/organisms.csv', header=None)

    # Create a list name/variable and use list()
    orgs = list(orgsfile[0])
    organismslist = formatlist(orgs)

    # Import alignment file as string
    alignment_file = open(
        'data/clustal-output/' + gene + '_Aligned/' + gene +
        '_aligned_cds_nucl.fasta', 'r')
    alignment_str = alignment_file.read()
    alignment_file.close()

    # Keep the branches in the species tree for species in the alignment file
    # Some species may not be present in the alignment file
    branches2keep = []
    for organism in organismslist:
        if organism in alignment_str:
            #print('Yup.')
            branches2keep.append(organism)
        else:
            pass
            #print('Nope.') Make an error code in the log

    # Input a list of branches to keep on the base tree
    speciestree = t.prune(branches2keep, preserve_branch_length=True)

    # Import the newick tree
    tree = EvolTree(speciestree)

    # Import the alignment
    tree.link_to_alignment('data/clustal-output/' + gene + '_Aligned/' + gene +
                           '_aligned_cds_nucl.fasta')

    tree.workdir = workdir

    # Set the binpath of the codeml binary
    tree.execpath = paml_path

    # Run the codeml model
    tree.run_model(model + '.' + gene)
예제 #6
0
def pamlSite(alnFile, treeFile, lModels, pamlParams, outDir, baseName, logger):

    tree = EvolTree(treeFile)
    os.mkdir(outDir + "paml_site/")
    tree.workdir = outDir + "paml_site/"
    tree.link_to_alignment(alnFile, "Fasta")
    logger.info("PAML codeml")

    dModelRun = {}
    for model in lModels:
        if model in ["M0", "M1", "M2", "M7", "M8"]:
            logger.info("Running {:s}".format(model))
            dModelRun[model] = tree.run_model(model)

    if "M1" and "M2" in dModelRun:
        p12 = tree.get_most_likely("M2", "M1")
        logger.info("LRT of M1 vs M2 = {}".format(p12))
    if "M7" and "M8" in dModelRun:
        p78 = tree.get_most_likely("M8", "M7")
        logger.info("LRT of M7 vs M8 = {}".format(p78))
    """
def run_codeml(mark_id, aln_file, tree_file, sleep):
    logger.info('sub-process: {0}'.format(str(mark_id)))
    time.sleep(round(sleep / args.threads, 2))
    run_dir = os.path.join(output_dir, str(mark_id))
    os.makedirs(run_dir)
    tree = EvolTree(tree_file, format=0)
    tree.link_to_alignment(aln_file)
    tree.run_model('M0')
    tree.workdir = run_dir
    tree.mark_tree([mark_id], marks=['#1'])
    tree.run_model('bsA.' + str(mark_id))
    tree.run_model('bsA1.' + str(mark_id))
    ps = tree.get_most_likely('bsA.' + str(mark_id), 'bsA1.' + str(mark_id))
    rx = tree.get_most_likely('bsA1.' + str(mark_id), 'M0')
    bsA = tree.get_evol_model('bsA.' + str(mark_id))
    p_bsA = bsA.classes['proportions'][2]
    wfrg2a = bsA.classes['foreground w'][2]
    if ps < 0.05 and float(wfrg2a) > 1:
        result = [mark_id, ps, rx, p_bsA, 'positive selection']
    elif rx < 0.05 and ps >= 0.05:
        result = [mark_id, ps, rx, p_bsA, 'relaxation']
    else:
        result = [mark_id, ps, rx, p_bsA, 'no signal']
    return result
예제 #8
0

print(
    """now running branch-site models C and D that represents
the addition of one class of sites in on specific branch.
These models must be compared to null models M1 and M3.
if branch-site models are detected to be significantly better,
than, one class of site is evolving at different rate in the marked
clade.
"""
)

# TODO: re-enable model M3

print("running branch-site C...")
tree.run_model("bsC.137")
# print ('running branch-site D...')
# tree.run_model ('bsD.137')
print("running M1 (all branches have the save value of omega)...")
tree.run_model("M1")
# print ('running M3 (all branches have the save value of omega)...')
# tree.run_model ('M3')

print(
    """p-value that, in marked clade, we have one class of site
specifically evolving at a different rate:"""
)
print(tree.get_most_likely("bsC.137", "M1"))
# print ('p-value representing significance that omega is different of 1:')
# print (tree.get_most_likely ('bsD.137', 'M3'))
예제 #9
0
import sys, os, subprocess
import argparse
from ete3 import EvolTree

tree = EvolTree("tree.nw",
                binpath="/home/edu/miniconda3/envs/ete3/bin/ete3_apps/bin")
tree.link_to_alignment("infile.phy", alg_format="phylip")
tree.workdir = os.getcwd()

print(tree)

print('running model M0, for comparison with branch-site models...')

tree.run_model('M0', keep=True)
#tree.link_to_evol_model("/home/edu/Desktop/Bioinformatica/Mitogenomics/Chondrichthyes/Phylogenetic_Tree","M0")
chimaeriformes = tree.get_common_ancestor("HM147138.1", "HM147135.1")
#chimaeriformes =tree.get_common_ancestor("Human_ECP","Goril_ECP")

for leaf in chimaeriformes:
    tree.mark_tree([leaf.node_id], marks=["#1"])
#tree.run_model("bsA." + chimaeriformes)
#tree.mark_tree([leaf.node_id], marks = ["#1"])
print("Running")
print(tree.write())
tree.run_model('bsA.Chimaeriformes')
tree.run_model("bsA1.Chimaeriformes")

print('p-value of positive selection for sites on this branch is: ')
ps = tree.get_most_likely('bsA.Chimaeriformes', 'bsA1.Chimaeriformes')
print(str(ps))
rx = tree.get_most_likely('bsA1.Chimaeriformes', 'M0')
예제 #10
0
print 'Now, it is necessary to link this tree to an alignment:'

tree.link_to_alignment ('data/S_example/alignment_S_measuring_evol.fasta')

raw_input ('\n   alignment loaded, hit some key to see.\n')

tree.show()

print '''
we will run free-ratio model that is one of models available through
function run_model:
+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
'''
print tree.run_model.__doc__ +'\n+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++'

tree.run_model ('fb.example')

raw_input ('free-ratio model runned, all results are store in a Model object.')

fb = tree.get_evol_model('fb.example')

print 'Have a look to the parameters used to run this model on codeml: '
print fb.get_ctrl_string()
raw_input ('hit some key...')


print 'Have a look to run message of codeml: '
print fb.run
raw_input ('hit some key...')

print 'Have a look to log likelihood value of this model, and number of parameters:'
예제 #11
0
    node.img_style = NodeStyle()
    node.img_style['bgcolor'] = '#ffaa00'
tree.show()

print('''now running branch-site models C and D that represents
the addition of one class of sites in on specific branch.
These models must be compared to null models M1 and M3.
if branch-site models are detected to be significantly better,
than, one class of site is evolving at different rate in the marked
clade.
''')

# TODO: re-enable model M3

print('running branch-site C...')
tree.run_model('bsC.137')
#print ('running branch-site D...')
#tree.run_model ('bsD.137')
print('running M1 (all branches have the save value of omega)...')
tree.run_model('M1')
#print ('running M3 (all branches have the save value of omega)...')
#tree.run_model ('M3')

print('''p-value that, in marked clade, we have one class of site
specifically evolving at a different rate:''')
print(tree.get_most_likely('bsC.137', 'M1'))
#print ('p-value representing significance that omega is different of 1:')
#print (tree.get_most_likely ('bsD.137', 'M3'))

print('The End.')
예제 #12
0
__email__ = "*****@*****.**"
__licence__ = "GPLv3"
__version__ = "0.0"

from ete3 import TreeStyle
from ete3 import EvolTree
from ete3 import faces

tree = EvolTree("data/S_example/measuring_S_tree.nw")
tree.link_to_alignment('data/S_example/alignment_S_measuring_evol.fasta')

print(tree)

print('\n Running free-ratio model with calculation of ancestral sequences...')

tree.run_model('fb_anc')
#tree.link_to_evol_model('/tmp/ete3-codeml/fb_anc/out', 'fb_anc')

I = TreeStyle()
I.force_topology = False
I.draw_aligned_faces_as_table = True
I.draw_guiding_lines = True
I.guiding_lines_type = 2
I.guiding_lines_color = "#CCCCCC"
for n in sorted(tree.get_descendants() + [tree], key=lambda x: x.node_id):
    if n.is_leaf(): continue
    anc_face = faces.SequenceFace(n.sequence, 'aa', fsize=10, bg_colors={})
    I.aligned_foot.add_face(anc_face, 1)
    I.aligned_foot.add_face(
        faces.TextFace('node_id: #%d ' % (n.node_id), fsize=8), 0)
print('display result of bs_anc model, with ancestral amino acid sequences.')
예제 #13
0
__licence__ = "GPLv3"
__version__ = "0.0"

from ete3 import TreeStyle
from ete3 import EvolTree
from ete3 import faces


tree = EvolTree ("data/S_example/measuring_S_tree.nw")
tree.link_to_alignment ('data/S_example/alignment_S_measuring_evol.fasta')

print tree

print '\n Running free-ratio model with calculation of ancestral sequences...'

tree.run_model ('fb_anc')
#tree.link_to_evol_model('/tmp/ete3-codeml/fb_anc/out', 'fb_anc')

I = TreeStyle()
I.force_topology             = False
I.draw_aligned_faces_as_table = True
I.draw_guiding_lines = True
I.guiding_lines_type = 2
I.guiding_lines_color = "#CCCCCC"
for n in sorted (tree.get_descendants()+[tree],
                 key=lambda x: x.node_id):
    if n.is_leaf(): continue
    anc_face = faces.SequenceFace (n.sequence, 'aa', fsize=10, bg_colors={})
    I.aligned_foot.add_face(anc_face, 1)
    I.aligned_foot.add_face(faces.TextFace('node_id: #%d '%(n.node_id),
                                           fsize=8), 0)
예제 #14
0
# display marked branches in orange
for node in tree.traverse():
    if not hasattr(node, 'mark'):
        continue
    if node.mark == '':
        continue
    node.img_style = NodeStyle()
    node.img_style['bgcolor'] = '#ffaa00'
tree.show()

print '''now running branch models
free branch models, 2 groups of branches, one with Gorilla and
chimp, the other with the rest of the phylogeny
'''
print 'running branch free...'
tree.run_model('b_free.137')
print 'running branch neut...'
tree.run_model('b_neut.137')
print 'running M0 (all branches have the save value of omega)...'
tree.run_model('M0')

raw_input('''Now we can do comparisons...
Compare first if we have one or 2 rates of evolution among phylogeny.
LRT between b_free and M0 (that is one or two rates of omega value)
p-value ofthis comparison is:''')
print tree.get_most_likely('b_free.137', 'M0')

raw_input('''
Now test if foreground rate is significantly different of 1.
(b_free with significantly better likelihood than b_neut)
if significantly different, and higher than one, we will be under
예제 #15
0
    node.img_style ['bgcolor'] = '#ffaa00'
tree.show()


print '''now running branch-site models C and D that represents
the addition of one class of sites in on specific branch.
These models must be compared to null models M1 and M3.
if branch-site models are detected to be significantly better,
than, one class of site is evolving at different rate in the marked
clade.
'''

# TODO: re-enable model M3

print 'running branch-site C...'
tree.run_model ('bsC.137')
#print 'running branch-site D...'
#tree.run_model ('bsD.137')
print 'running M1 (all branches have the save value of omega)...'
tree.run_model ('M1')
#print 'running M3 (all branches have the save value of omega)...'
#tree.run_model ('M3')

print '''p-value that, in marked clade, we have one class of site
specifically evolving at a different rate:'''
print tree.get_most_likely ('bsC.137', 'M1')
#print 'p-value representing significance that omega is different of 1:'
#print tree.get_most_likely ('bsD.137', 'M3')


print 'The End.'
예제 #16
0
from ete3 import EvolTree

tree = EvolTree ("data/S_example/measuring_S_tree.nw")
tree.link_to_alignment ('data/S_example/alignment_S_measuring_evol.fasta')

print (tree)

try:
    input = raw_input
except NameError:
    pass

input ('\n   tree and alignment loaded\n Hit some key, to start computation of site models M1 and M2.\n')

print ('running model M1')
tree.run_model ('M1')
print ('running model M2')
tree.run_model ('M2')

print ('\n\n comparison of models M1 and M2, p-value: ' + str(tree.get_most_likely ('M2','M1')))

#tree.show()

print ('by default the hist represented is this one:')

tree.show (histfaces=['M2'])

print ('but we can choose between many others...')

model2 = tree.get_evol_model ('M2')
예제 #17
0
# display marked branches in orange
for node in tree.traverse ():
    if not hasattr (node, 'mark'):
        continue
    if node.mark == '':
        continue
    node.img_style = NodeStyle ()
    node.img_style ['bgcolor'] = '#ffaa00'
tree.show()

print ('''now running branch models
free branch models, 2 groups of branches, one with Gorilla and
chimp, the other with the rest of the phylogeny
''')
print ('running branch free...')
tree.run_model ('b_free.137')
print ('running branch neut...')
tree.run_model ('b_neut.137')
print ('running M0 (all branches have the save value of omega)...')
tree.run_model ('M0')

input ('''Now we can do comparisons...
Compare first if we have one or 2 rates of evolution among phylogeny.
LRT between b_free and M0 (that is one or two rates of omega value)
p-value ofthis comparison is:''')
print (tree.get_most_likely ('b_free.137', 'M0'))

input ('''
Now test if foreground rate is significantly different of 1.
(b_free with significantly better likelihood than b_neut)
if significantly different, and higher than one, we will be under
예제 #18
0
        if starting_branch_length_option == 1:
            branch_estimation = 'bl'
        elif starting_branch_length_option == -1:
            branch_estimation = 'random'
        for initial_omega in [0.2, 0.7, 1.2]:
            if model == 'bsA1':
                initial_omega = 1.0
            model_specifications = model + '.' + branch_estimation + '_' + \
                                   str(initial_omega) + 'w'
            print 'Testing model ' + model + ' on ' + alignment_name + \
                  ' using starting branch length option ' + \
                  branch_estimation + ' and initial omega: ' + \
                  str(initial_omega) + 'w'
            if model == 'XX':
                tree.run_model(model_specifications, \
                            fix_blength=starting_branch_length_option, \
                            omega=initial_omega, NSsites=22, ncatG=3)

                # Here's the garbage I wrote to make sure that it parses the out files correctly
                tree.get_evol_model(
                    model_specifications).properties['typ'] = 'branch-site'
                tree.get_evol_model(model_specifications)._load(
                    model_specifications + '/out')

            else:
                tree.run_model(model_specifications, \
                            fix_blength=starting_branch_length_option, \
                            omega=initial_omega)
            current_model = tree.get_evol_model(model_specifications)
            print 'The fitting of model ' + model + ' on ' + alignment_name + \
                  ' using starting branch length option ' + \
예제 #19
0
from ete3 import EvolTree
from string import ascii_letters

# CREATE TREE
fasta_lines = open("./whales.fasta", "r").readlines()

taxa = [l.replace('>', '').strip() for l in fasta_lines if l.startswith('>')]
taxa_map = { t: ascii_letters[i] for i, t in enumerate(taxa) }

taxa_string = '(' * (len(taxa) - 1) + '%s,%s)' % (ascii_letters[0], ascii_letters[1])
for t in ascii_letters[2:len(taxa)]:
    taxa_string = taxa_string + ',%s)' % t
taxa_string = taxa_string + ';'

align = ''.join(fasta_lines)
for t in taxa:
    align = align.replace(t, taxa_map[t])

tree = EvolTree(taxa_string)
tree.link_to_alignment(align)
#tree.link_to_evol_model("M2")
#tree.get_evol_model("M2")
print(tree.run_model.__doc__)
tree.run_model("fb")
예제 #20
0
            omega_list.append("NA")
            continue
        else:
            evotree.link_to_alignment(subfasta)
            workdirname = './codeml_' + "__".join(closest_seq_ids)
            evotree.workdir = workdirname
            list_of_tempdirs.append(workdirname)
            # mark the foreground branch
            foreground_leafnode = evotree & seqid
            #			print (seqid)
            #			print(foreground_leafnode.node_id)
            #			print (evotree.write())
            evotree.mark_tree([foreground_leafnode.node_id], ['#1'])
            #			print (evotree.write())

            evotree.run_model('b_free.run')
            b_free_fit = evotree.get_evol_model('b_free.run')
            out_branches_dict = b_free_fit.branches
            for b in out_branches_dict:
                if out_branches_dict[b]["mark"] == " #1":
                    # check if there are at least 1 synonymous substitutions expected on this branch... otherwise not very meaningful to estimate omega (it will be very high).
                    if out_branches_dict[b]["S"] * out_branches_dict[b][
                            "dS"] >= 1.0:
                        omega = out_branches_dict[b]["w"]
                    else:
                        omega = "NA"
                    break
            omega_list.append(omega)
    numeric_omegas = [float(x) for x in omega_list if not x == "NA"]
    try:
        avg_omega = sum(numeric_omegas) / float(len(numeric_omegas))
예제 #21
0
06 Feb 2011

use slr to compute evolutionary rates
"""

__author__  = "Francois-Jose Serra"
__email__   = "*****@*****.**"
__licence__ = "GPLv3"
__version__ = "0.0"

from ete3 import EvolTree


tree = EvolTree ("data/S_example/measuring_S_tree.nw")
tree.link_to_alignment ("data/S_example/alignment_S_measuring_evol.fasta")


tree.run_model ('SLR')

slr = tree.get_evol_model ('SLR')

slr.set_histface (up=False, kind='curve',errors=True,
                  hlines = [1.0,0.3], hlines_col=['black','grey'])

tree.show (histfaces=['SLR'])





예제 #22
0
__licence__ = "GPLv3"
__version__ = "0.0"


from ete3 import EvolTree


tree = EvolTree("data/S_example/measuring_S_tree.nw")
tree.link_to_alignment('data/S_example/alignment_S_measuring_evol.fasta')

print tree

raw_input('\n   tree and alignment loaded\nHit some key, to start computation of branch site models A and A1 on each branch.\n')

print 'running model M0, for comparison with branch-site models...'
tree.run_model('M0')

# each node/leaf has two kind of identifiers node_id and paml_id, to mark nodes we have to specify
# the node_id of the nodes we want to mark, and the kind of mark in this way:

for leaf in tree:
    leaf.node_id
    print '\n---------\nNow working with leaf ' + leaf.name
    tree.mark_tree([leaf.node_id], marks=['#1'])
    print tree.write()
    # to organize a bit, we name model with the name of the marked node
    # any character after the dot, in model name, is not taken into account
    # for computation. (have a look in /tmp/ete3.../bsA.. directory)
    print 'running model bsA and bsA1'
    tree.run_model('bsA.'+ leaf.name)
    tree.run_model('bsA1.' + leaf.name)
예제 #23
0
파일: 1_freeratio.py 프로젝트: abdo3a/ete
input("\n   alignment loaded, hit some key to see.\n")

tree.show()

print(
    """
we will run free-ratio model that is one of models available through
function run_model:
+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
"""
)
print(
    tree.run_model.__doc__ + "\n+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++"
)

tree.run_model("fb.example")

input("free-ratio model runned, all results are store in a Model object.")

fb = tree.get_evol_model("fb.example")

print("Have a look to the parameters used to run this model on codeml: ")
print(fb.get_ctrl_string())
input("hit some key...")


print("Have a look to run message of codeml: ")
print(fb.run)
input("hit some key...")

print("Have a look to log likelihood value of this model, and number of parameters:")