예제 #1
0
def run_evol_py(tree,
                alg,
                branch_model,
                site_models,
                workir='data/evol_output',
                tool_dir="ete3_apps/bin"):
    print(tree, alg, branch_model, site_models)
    builtin_apps_path = None
    builtin_apps_path = os.path.join(os.path.split(ete3_path)[0], tool_dir)

    tree = EvolTree(tree, binpath=builtin_apps_path)
    tree.link_to_alignment(alg)
    tree.workdir = workir

    ###branch model
    if branch_model:
        branch_model = str(branch_model)
        tree.run_model(branch_model)
        print(tree.get_evol_model(branch_model))

    ### site model
    for site_model in site_models:
        tree.run_model(site_model)
        #tree.run_model('SLR.lele')

    global evol_output_dir, final_evol_tree
    evol_output_dir = workir
    final_evol_tree = evol_output_dir + '/tree_evol_result.png'

    tree.render(final_evol_tree,
                layout=evol_clean_layout,
                histfaces=site_models)
    return tree
예제 #2
0
    def run(self, pamlsrc, output_folder, model='M1'):
        """Run PAML using ETE.

        The default model is M1 as it is best for orthology inference in
        our case. You can use models `M2`, `M0`, `M3`.

        Ensure that you have the correct path to your codeml binary. It should
        be in the paml `/bin`.

        :param pamlsrc: Path to the codemly binary.
        :param output_folder: The name of the output folder.
        :param model: The model to be used. (Default value = 'M1')
        """

        # Import the newick tree
        tree = EvolTree('temptree.nw')

        # Import the alignment
        tree.link_to_alignment(self.alignmentfile)

        tree.workdir = self.workdir

        # Set the binpath of the codeml binary
        tree.execpath = pamlsrc
        # Run the model M1, M2, M3, or M0
        model_path = model + '.' + output_folder
        tree.run_model(model_path)
        self.ete3paml_log.info('Codeml is generating data in %s.' % model_path)
예제 #3
0
    def main(self):
        """The main function for running the test."""

        print("Running model %s paml on input." % str(self.defaultmodel))

        tree = EvolTree(self.tree)  # Import the newick tree
        tree.link_to_alignment(self.alignment)  # Import the alignment
        tree.workdir = self.workdir  # Set the working directory
        tree.execpath = self.pamlpath  # Set the binpath of the codeml binary
        tree.run_model(self.defaultmodel)  # Run the codeml model
예제 #4
0
def count_omega(align_file, gene_name):
    print(gene_name)
    tree = EvolTree(tree_file)
    tree.link_to_alignment(align_file)
    #
    # #free branch ratio count
    tree.run_model('fb')
    fb_results = tree.get_evol_model('fb')
    print(fb_results)
    with open(temp, 'w') as temp_file:
        temp_file.write(str(fb_results))
    write_in_table(gene_name)
예제 #5
0
def ete3paml(gene, paml_path, workdir='data/paml-output/', model='M1'):
    """ Use ETE3's integration with PAML"""

    # Import the species tree to compare species that are present in alignment
    # file
    t = Tree('data/initial-data/species_tree.nw', format=1)
    orgsfile = pd.read_csv('data/initial-data/organisms.csv', header=None)

    # Create a list name/variable and use list()
    orgs = list(orgsfile[0])
    organismslist = formatlist(orgs)

    # Import alignment file as string
    alignment_file = open(
        'data/clustal-output/' + gene + '_Aligned/' + gene +
        '_aligned_cds_nucl.fasta', 'r')
    alignment_str = alignment_file.read()
    alignment_file.close()

    # Keep the branches in the species tree for species in the alignment file
    # Some species may not be present in the alignment file
    branches2keep = []
    for organism in organismslist:
        if organism in alignment_str:
            #print('Yup.')
            branches2keep.append(organism)
        else:
            pass
            #print('Nope.') Make an error code in the log

    # Input a list of branches to keep on the base tree
    speciestree = t.prune(branches2keep, preserve_branch_length=True)

    # Import the newick tree
    tree = EvolTree(speciestree)

    # Import the alignment
    tree.link_to_alignment('data/clustal-output/' + gene + '_Aligned/' + gene +
                           '_aligned_cds_nucl.fasta')

    tree.workdir = workdir

    # Set the binpath of the codeml binary
    tree.execpath = paml_path

    # Run the codeml model
    tree.run_model(model + '.' + gene)
예제 #6
0
def pamlSite(alnFile, treeFile, lModels, pamlParams, outDir, baseName, logger):

    tree = EvolTree(treeFile)
    os.mkdir(outDir + "paml_site/")
    tree.workdir = outDir + "paml_site/"
    tree.link_to_alignment(alnFile, "Fasta")
    logger.info("PAML codeml")

    dModelRun = {}
    for model in lModels:
        if model in ["M0", "M1", "M2", "M7", "M8"]:
            logger.info("Running {:s}".format(model))
            dModelRun[model] = tree.run_model(model)

    if "M1" and "M2" in dModelRun:
        p12 = tree.get_most_likely("M2", "M1")
        logger.info("LRT of M1 vs M2 = {}".format(p12))
    if "M7" and "M8" in dModelRun:
        p78 = tree.get_most_likely("M8", "M7")
        logger.info("LRT of M7 vs M8 = {}".format(p78))
    """
예제 #7
0
def main():
    """
    main function
    """
    tree = EvolTree(WRKDIR + 'tree.nw')
    tree.workdir = 'data/protamine/PRM1/paml/'

    random_swap(tree)
    tree.link_to_evol_model(WRKDIR + 'paml/fb/fb.out', 'fb')
    check_annotation(tree)
    tree.link_to_evol_model(WRKDIR + 'paml/M1/M1.out', 'M1')
    tree.link_to_evol_model(WRKDIR + 'paml/M2/M2.out', 'M2')
    tree.link_to_evol_model(WRKDIR + 'paml/M7/M7.out', 'M7')
    tree.link_to_evol_model(WRKDIR + 'paml/M8/M8.out', 'M8')
    tree.link_to_alignment(WRKDIR + 'alignments.fasta_ali')
    print 'pv of LRT M2 vs M1: ',
    print tree.get_most_likely('M2', 'M1')
    print 'pv of LRT M8 vs M7: ',
    print tree.get_most_likely('M8', 'M7')

    tree.show(histfaces=['M2'])

    print 'The End.'
def run_codeml(mark_id, aln_file, tree_file, sleep):
    logger.info('sub-process: {0}'.format(str(mark_id)))
    time.sleep(round(sleep / args.threads, 2))
    run_dir = os.path.join(output_dir, str(mark_id))
    os.makedirs(run_dir)
    tree = EvolTree(tree_file, format=0)
    tree.link_to_alignment(aln_file)
    tree.run_model('M0')
    tree.workdir = run_dir
    tree.mark_tree([mark_id], marks=['#1'])
    tree.run_model('bsA.' + str(mark_id))
    tree.run_model('bsA1.' + str(mark_id))
    ps = tree.get_most_likely('bsA.' + str(mark_id), 'bsA1.' + str(mark_id))
    rx = tree.get_most_likely('bsA1.' + str(mark_id), 'M0')
    bsA = tree.get_evol_model('bsA.' + str(mark_id))
    p_bsA = bsA.classes['proportions'][2]
    wfrg2a = bsA.classes['foreground w'][2]
    if ps < 0.05 and float(wfrg2a) > 1:
        result = [mark_id, ps, rx, p_bsA, 'positive selection']
    elif rx < 0.05 and ps >= 0.05:
        result = [mark_id, ps, rx, p_bsA, 'relaxation']
    else:
        result = [mark_id, ps, rx, p_bsA, 'no signal']
    return result
예제 #9
0
def main(args):
    if args.BinPath:
        tree = EvolTree(args.Tree, binpath=args.BinPath)
    else:
        tree = EvolTree(args.Tree)
    if args.MSA[:-3] == ".phy":
        tree.link_to_alignment(args.MSA, format="phylip")
    elif args.MSA:
        tree.link_to_alignment(args.MSA)
    print(tree)
    tree.workdir = os.getcwd()
    if args.LoadedModels:
        load_model(args.LoadedModels, tree)
        compare_models(models=args.LoadedModels, tree=tree, args=args)
    if args.Models:
        run_models(args.models, tree)
    if args.Compare:
        if args.TreeStruct:
            tree_structure = parse_structure_file(args.TreeStruct)
            compare_models(models=args.Compare,
                           tree=tree,
                           tree_structure=tree_structure)
        else:
            compare_models(models=args.Compare, tree=tree, args=args)
예제 #10
0
def main():
    """
    main function
    """
    tree = EvolTree (WRKDIR + 'tree.nw')
    tree.workdir = 'data/protamine/PRM1/paml/'

    random_swap(tree)
    tree.link_to_evol_model (WRKDIR + 'paml/fb/fb.out', 'fb')
    check_annotation (tree)
    tree.link_to_evol_model (WRKDIR + 'paml/M1/M1.out', 'M1')
    tree.link_to_evol_model (WRKDIR + 'paml/M2/M2.out', 'M2')
    tree.link_to_evol_model (WRKDIR + 'paml/M7/M7.out', 'M7')
    tree.link_to_evol_model (WRKDIR + 'paml/M8/M8.out', 'M8')
    tree.link_to_alignment  (WRKDIR + 'alignments.fasta_ali')
    print 'pv of LRT M2 vs M1: ',
    print tree.get_most_likely ('M2','M1')
    print 'pv of LRT M8 vs M7: ',
    print tree.get_most_likely ('M8','M7')


    tree.show (histfaces=['M2'])

    print 'The End.'
예제 #11
0
15 Nov 2010

simple example to mark a tree and compute branch-site test of positive selection
"""

__author__  = "Francois-Jose Serra"
__email__   = "*****@*****.**"
__licence__ = "GPLv3"
__version__ = "0.0"


from ete3 import EvolTree


tree = EvolTree("data/S_example/measuring_S_tree.nw")
tree.link_to_alignment('data/S_example/alignment_S_measuring_evol.fasta')

print tree

raw_input('\n   tree and alignment loaded\nHit some key, to start computation of branch site models A and A1 on each branch.\n')

print 'running model M0, for comparison with branch-site models...'
tree.run_model('M0')

# each node/leaf has two kind of identifiers node_id and paml_id, to mark nodes we have to specify
# the node_id of the nodes we want to mark, and the kind of mark in this way:

for leaf in tree:
    leaf.node_id
    print '\n---------\nNow working with leaf ' + leaf.name
    tree.mark_tree([leaf.node_id], marks=['#1'])
예제 #12
0
                np.array(all_dists),
                2)  # for the case that list is only 3 items long
        closest_seq_ids = [seqid]
        for d in idxes_of_3_smallest:
            closest_seq_ids.append(seqids_of_other_species[d])
        # ete3 has codeml handling implemented!! No need for own functions.
        subtree = t.copy()
        subtree.prune(closest_seq_ids, preserve_branch_length=True)
        subtree.unroot()
        evotree = EvolTree(subtree.write())
        subfasta = make_clean_fasta(closest_seq_ids, seqdatadict)
        if not subfasta:
            omega_list.append("NA")
            continue
        else:
            evotree.link_to_alignment(subfasta)
            workdirname = './codeml_' + "__".join(closest_seq_ids)
            evotree.workdir = workdirname
            list_of_tempdirs.append(workdirname)
            # mark the foreground branch
            foreground_leafnode = evotree & seqid
            #			print (seqid)
            #			print(foreground_leafnode.node_id)
            #			print (evotree.write())
            evotree.mark_tree([foreground_leafnode.node_id], ['#1'])
            #			print (evotree.write())

            evotree.run_model('b_free.run')
            b_free_fit = evotree.get_evol_model('b_free.run')
            out_branches_dict = b_free_fit.branches
            for b in out_branches_dict:
예제 #13
0
from ete3 import EvolTree
from string import ascii_letters

# CREATE TREE
fasta_lines = open("./whales.fasta", "r").readlines()

taxa = [l.replace('>', '').strip() for l in fasta_lines if l.startswith('>')]
taxa_map = { t: ascii_letters[i] for i, t in enumerate(taxa) }

taxa_string = '(' * (len(taxa) - 1) + '%s,%s)' % (ascii_letters[0], ascii_letters[1])
for t in ascii_letters[2:len(taxa)]:
    taxa_string = taxa_string + ',%s)' % t
taxa_string = taxa_string + ';'

align = ''.join(fasta_lines)
for t in taxa:
    align = align.replace(t, taxa_map[t])

tree = EvolTree(taxa_string)
tree.link_to_alignment(align)
#tree.link_to_evol_model("M2")
#tree.get_evol_model("M2")
print(tree.run_model.__doc__)
tree.run_model("fb")
예제 #14
0
tree = EvolTree(tree_file)
out_tree_name = os.path.basename(tree_file)
out_tree_name = os.path.splitext(out_tree_name)[0]
out_tree_name = out_tree_name + '_' + gene_name + '.tre'

# If there is a new alignment, prune the tree down to the taxa that remain in
# the new alignment and write a new tree because EvolTree is shit and can't
# use the pruned tree saved in memory
if empty_seq_count >= 1:
    if len(taxa_in_alignment) >= 1:
        tree.prune(taxa_in_alignment, preserve_branch_length=True)
        tree.unroot()
        tree.write(outfile=out_tree_name, format=0)
        tree = EvolTree(out_tree_name)

tree.link_to_alignment(alignment_file)

tree.workdir = os.getcwd()

# Record list of all node_ids in the tree for later retrieving omega from a
# background branch in the b_free model
list_of_node_ids = []
for node in tree.traverse('postorder'):
    list_of_node_ids.append(node.node_id)

test_taxa = []
with open(test_taxa_file, 'r') as test_taxa_list:
    for taxon in test_taxa_list:
        taxon = taxon.rstrip()
        test_taxa.append(taxon)
예제 #15
0
'can be run from within a paml directory'
from ete3 import EvolTree
import os

tree_file = "testTree.tre"
alignment_file = "testAlignment.fasta"
model = "./model/out"
model_name = "bsD.bl_0.2w"
# model_name = os.path.basename(os.getcwd())

testTree = EvolTree(tree_file)
testTree.link_to_alignment(alignment_file)
testTree.link_to_evol_model(model, model_name)

testTree.show()
예제 #16
0
import sys, os, subprocess
import argparse
from ete3 import EvolTree

tree = EvolTree("tree.nw",
                binpath="/home/edu/miniconda3/envs/ete3/bin/ete3_apps/bin")
tree.link_to_alignment("infile.phy", alg_format="phylip")
tree.workdir = os.getcwd()

print(tree)

print('running model M0, for comparison with branch-site models...')

tree.run_model('M0', keep=True)
#tree.link_to_evol_model("/home/edu/Desktop/Bioinformatica/Mitogenomics/Chondrichthyes/Phylogenetic_Tree","M0")
chimaeriformes = tree.get_common_ancestor("HM147138.1", "HM147135.1")
#chimaeriformes =tree.get_common_ancestor("Human_ECP","Goril_ECP")

for leaf in chimaeriformes:
    tree.mark_tree([leaf.node_id], marks=["#1"])
#tree.run_model("bsA." + chimaeriformes)
#tree.mark_tree([leaf.node_id], marks = ["#1"])
print("Running")
print(tree.write())
tree.run_model('bsA.Chimaeriformes')
tree.run_model("bsA1.Chimaeriformes")

print('p-value of positive selection for sites on this branch is: ')
ps = tree.get_most_likely('bsA.Chimaeriformes', 'bsA1.Chimaeriformes')
print(str(ps))
rx = tree.get_most_likely('bsA1.Chimaeriformes', 'M0')