Ejemplo n.º 1
0
def parse_tree(t):
    """Simple function to read a tree file off the disk and return it as a
    Tree object. Also calls the render() method on it so that it gets node IDs
    that we can use to apply labels/marks."""
    treeobj = EvolTree(t)
    treeobj.render('')
    return treeobj
Ejemplo n.º 2
0
def prepare_codeml(round, fasta_file_name, species_tree, marks, wd):
    """Creates all files necessary for codeml performance,
    including marked, pruned tree, control files and alignment in phy format"""
    run_name = "r" + str(round).zfill(2)
    tree = EvolTree(species_tree)  # init tree every time a fasta is open
    name = fasta_file_name.replace(args.suffix, "")
    # print(name)
    # create path and change dir
    create_dir(wd, name)
    os.chdir(os.path.join(wd, name))
    genomes = fasta_ids(os.path.join(wd, fasta_file_name))  # genomes contained in fasta file
    # Tree prunning
    prune_tree(tree, genomes)

    # Mark branches if branch or branch-site models selected
    if args.mode in ["BM", "BS"]:
        mark_spp = list(set(marks[str(round)]).intersection(genomes))
        mark_branches(tree, mark_spp)

        # Check monophyly of taxa if -gene_trees option
        # Individual gene trees if -gene_trees option
        if args.single_trees:
            fast_tree(os.path.join(wd, fasta_file_name),
                      os.path.join(wd, name, fasta_file_name + ".ftree"))
            gene_tree = midpoint_root(tree_features(os.path.join(wd, name, fasta_file_name + ".ftree")))
            if not is_monophyletic(gene_tree, mark_spp):
                logging.warning("Check monophyly in the clade-of-interest: {}".format(name))

    tree.write(outfile=name + ".tree")  # write tree with only topology
    # File format converter: MSA fasta --> Phylip
    fasta2phy(os.path.join(wd, fasta_file_name), name + ".phy")

    # Create alt and null ctl files
    control_files(wd, args.mode, name, run_name)
Ejemplo n.º 3
0
    def run(self, pamlsrc, output_folder, model='M1'):
        """Run PAML using ETE.

        The default model is M1 as it is best for orthology inference in
        our case. You can use models `M2`, `M0`, `M3`.

        Ensure that you have the correct path to your codeml binary. It should
        be in the paml `/bin`.

        :param pamlsrc: Path to the codemly binary.
        :param output_folder: The name of the output folder.
        :param model: The model to be used. (Default value = 'M1')
        """

        # Import the newick tree
        tree = EvolTree('temptree.nw')

        # Import the alignment
        tree.link_to_alignment(self.alignmentfile)

        tree.workdir = self.workdir

        # Set the binpath of the codeml binary
        tree.execpath = pamlsrc
        # Run the model M1, M2, M3, or M0
        model_path = model + '.' + output_folder
        tree.run_model(model_path)
        self.ete3paml_log.info('Codeml is generating data in %s.' % model_path)
Ejemplo n.º 4
0
    def main(self):
        """The main function for running the test."""

        print("Running model %s paml on input." % str(self.defaultmodel))

        tree = EvolTree(self.tree)  # Import the newick tree
        tree.link_to_alignment(self.alignment)  # Import the alignment
        tree.workdir = self.workdir  # Set the working directory
        tree.execpath = self.pamlpath  # Set the binpath of the codeml binary
        tree.run_model(self.defaultmodel)  # Run the codeml model
Ejemplo n.º 5
0
def count_omega(align_file, gene_name):
    print(gene_name)
    tree = EvolTree(tree_file)
    tree.link_to_alignment(align_file)
    #
    # #free branch ratio count
    tree.run_model('fb')
    fb_results = tree.get_evol_model('fb')
    print(fb_results)
    with open(temp, 'w') as temp_file:
        temp_file.write(str(fb_results))
    write_in_table(gene_name)
Ejemplo n.º 6
0
def ete3paml(gene, paml_path, workdir='data/paml-output/', model='M1'):
    """ Use ETE3's integration with PAML"""

    # Import the species tree to compare species that are present in alignment
    # file
    t = Tree('data/initial-data/species_tree.nw', format=1)
    orgsfile = pd.read_csv('data/initial-data/organisms.csv', header=None)

    # Create a list name/variable and use list()
    orgs = list(orgsfile[0])
    organismslist = formatlist(orgs)

    # Import alignment file as string
    alignment_file = open(
        'data/clustal-output/' + gene + '_Aligned/' + gene +
        '_aligned_cds_nucl.fasta', 'r')
    alignment_str = alignment_file.read()
    alignment_file.close()

    # Keep the branches in the species tree for species in the alignment file
    # Some species may not be present in the alignment file
    branches2keep = []
    for organism in organismslist:
        if organism in alignment_str:
            #print('Yup.')
            branches2keep.append(organism)
        else:
            pass
            #print('Nope.') Make an error code in the log

    # Input a list of branches to keep on the base tree
    speciestree = t.prune(branches2keep, preserve_branch_length=True)

    # Import the newick tree
    tree = EvolTree(speciestree)

    # Import the alignment
    tree.link_to_alignment('data/clustal-output/' + gene + '_Aligned/' + gene +
                           '_aligned_cds_nucl.fasta')

    tree.workdir = workdir

    # Set the binpath of the codeml binary
    tree.execpath = paml_path

    # Run the codeml model
    tree.run_model(model + '.' + gene)
Ejemplo n.º 7
0
def main(args):
    if args.BinPath:
        tree = EvolTree(args.Tree, binpath=args.BinPath)
    else:
        tree = EvolTree(args.Tree)
    if args.MSA[:-3] == ".phy":
        tree.link_to_alignment(args.MSA, format="phylip")
    elif args.MSA:
        tree.link_to_alignment(args.MSA)
    print(tree)
    tree.workdir = os.getcwd()
    if args.LoadedModels:
        load_model(args.LoadedModels, tree)
        compare_models(models=args.LoadedModels, tree=tree, args=args)
    if args.Models:
        run_models(args.models, tree)
    if args.Compare:
        if args.TreeStruct:
            tree_structure = parse_structure_file(args.TreeStruct)
            compare_models(models=args.Compare,
                           tree=tree,
                           tree_structure=tree_structure)
        else:
            compare_models(models=args.Compare, tree=tree, args=args)
Ejemplo n.º 8
0
def pamlSite(alnFile, treeFile, lModels, pamlParams, outDir, baseName, logger):

    tree = EvolTree(treeFile)
    os.mkdir(outDir + "paml_site/")
    tree.workdir = outDir + "paml_site/"
    tree.link_to_alignment(alnFile, "Fasta")
    logger.info("PAML codeml")

    dModelRun = {}
    for model in lModels:
        if model in ["M0", "M1", "M2", "M7", "M8"]:
            logger.info("Running {:s}".format(model))
            dModelRun[model] = tree.run_model(model)

    if "M1" and "M2" in dModelRun:
        p12 = tree.get_most_likely("M2", "M1")
        logger.info("LRT of M1 vs M2 = {}".format(p12))
    if "M7" and "M8" in dModelRun:
        p78 = tree.get_most_likely("M8", "M7")
        logger.info("LRT of M7 vs M8 = {}".format(p78))
    """
def tree_layout(tree_file, ps_node_list):
    t = EvolTree(tree_file, format=0)
    style_other = NodeStyle()
    style_other['size'] = 6
    style_ps = NodeStyle()
    style_ps['fgcolor'] = '#ff0000'
    style_ps['size'] = 6
    for node in t.iter_descendants():
        descendant = t.get_descendant_by_node_id(node.node_id)
        if node.node_id in ps_node_list:
            descendant.img_style = style_ps
        else:
            descendant.img_style = style_other
    ts = TreeStyle()
    ts.layout_fn = layout
    ts.show_branch_support = False
    ts.show_branch_length = False
    ts.show_leaf_name = False
    result_picture = os.path.join(output_dir, 'positive_selection_tree.png')
    t.render(result_picture, tree_style=ts)
Ejemplo n.º 10
0
example of computation and display of an ancestral sequence
computed under free'ratio model.
"""

__author__  = "Francois-Jose Serra"
__email__   = "*****@*****.**"
__licence__ = "GPLv3"
__version__ = "0.0"

from ete3 import TreeStyle
from ete3 import EvolTree
from ete3 import faces


tree = EvolTree ("data/S_example/measuring_S_tree.nw")
tree.link_to_alignment ('data/S_example/alignment_S_measuring_evol.fasta')

print tree

print '\n Running free-ratio model with calculation of ancestral sequences...'

tree.run_model ('fb_anc')
#tree.link_to_evol_model('/tmp/ete3-codeml/fb_anc/out', 'fb_anc')

I = TreeStyle()
I.force_topology             = False
I.draw_aligned_faces_as_table = True
I.draw_guiding_lines = True
I.guiding_lines_type = 2
I.guiding_lines_color = "#CCCCCC"
Ejemplo n.º 11
0
def main():
    """
    main function
    """
    tree = EvolTree(WRKDIR + 'tree.nw')
    tree.workdir = 'data/protamine/PRM1/paml/'

    random_swap(tree)
    tree.link_to_evol_model(WRKDIR + 'paml/fb/fb.out', 'fb')
    check_annotation(tree)
    tree.link_to_evol_model(WRKDIR + 'paml/M1/M1.out', 'M1')
    tree.link_to_evol_model(WRKDIR + 'paml/M2/M2.out', 'M2')
    tree.link_to_evol_model(WRKDIR + 'paml/M7/M7.out', 'M7')
    tree.link_to_evol_model(WRKDIR + 'paml/M8/M8.out', 'M8')
    tree.link_to_alignment(WRKDIR + 'alignments.fasta_ali')
    print 'pv of LRT M2 vs M1: ',
    print tree.get_most_likely('M2', 'M1')
    print 'pv of LRT M8 vs M7: ',
    print tree.get_most_likely('M8', 'M7')

    tree.show(histfaces=['M2'])

    print 'The End.'
Ejemplo n.º 12
0
'can be run from within a paml directory'
from ete3 import EvolTree
import os

tree_file = "testTree.tre"
alignment_file = "testAlignment.fasta"
model = "./model/out"
model_name = "bsD.bl_0.2w"
# model_name = os.path.basename(os.getcwd())

testTree = EvolTree(tree_file)
testTree.link_to_alignment(alignment_file)
testTree.link_to_evol_model(model, model_name)

testTree.show()
def load_parameters():
    descendant_dict = defaultdict()
    with open(input_file, 'r') as f:
        all_lines = f.readlines()
        aln_file = all_lines[0].strip()
        if not os.path.exists(aln_file):
            logger.error('Invalid cDNA alignment file: {0}'.format(aln_file))
            sys.exit(1)
        logger.info('Input cDNA alignment file: {0}'.format(aln_file))
        seq_id_dict = defaultdict()
        seq_id_list = []
        for seq_record in AlignIO.read(aln_file, 'fasta'):
            seq_id_dict[str(seq_record.id)] = 1
            seq_id_list.append(str(seq_record.id))
        tree_file = all_lines[1].strip()
        if not os.path.exists(tree_file):
            logger.error('Invalid tree file: {0}'.format(tree_file))
            sys.exit(1)
        logger.info('Input tree file: {0}'.format(tree_file))
        tmp_t = Tree(tree_file, format=0)
        node_id_dict = defaultdict()
        for node in tmp_t:
            node_id_dict[str(node.name)] = 1
        if seq_id_dict != node_id_dict:
            if len(seq_id_dict) < len(node_id_dict):
                logger.warning('Sequences is less than tree nodes.')
                logger.info('Trim input tree file.')
                tree_file = trim_tree(tree_file, seq_id_list)
            else:
                logger.error('Sequences is falsely greater than tree nodes.')
                sys.exit(1)
        t = EvolTree(tree_file, format=1)
        for descendant in t.iter_descendants():
            descendant_dict[descendant.node_id] = str(descendant)
        root = t.get_tree_root()
        id_list = []
        for leaf in t.traverse('preorder'):
            id_list.append(leaf.node_id)
        select_nodes = []
        if len(all_lines) > 2:
            for each_line in all_lines[2:]:
                s = each_line.strip()
                if s:
                    select_nodes.append(s)
        if select_nodes:
            nodes_line = ', '.join(select_nodes)
            logger.info('Input nodes: {0}'.format(nodes_line))
            for node in select_nodes:
                if node not in t:
                    logger.error('Error node: {0}'.format(node))
                    sys.exit(1)
            if not t.check_monophyly(values=select_nodes, target_attr='name'):
                logger.error('Some nodes are not monophyletic.')
                sys.exit(1)
            common_ancestor = t.get_common_ancestor(select_nodes)
        else:
            common_ancestor = root
            logger.info('No specific node')
        run_list = []
        for s in common_ancestor.iter_descendants():
            run_list.append(s.node_id)
        logger.info('These node ids will be checked: {0}'.format(
            str(run_list)))
        return run_list, aln_file, tree_file, descendant_dict
Ejemplo n.º 14
0
def tree_features(tree_file):
    """Add a feature to the tree called <spptag> with the Species Tag"""
    tree_handle = EvolTree(tree_file)
    for leaf in tree_handle.iter_leaves():
        leaf.add_feature("spptag", leaf.name.split("_")[0])
    return tree_handle
Ejemplo n.º 15
0
                                        "-")) == gapSeq:  # if it's just gaps
        empty_seq_count += 1

# If there were empty sequences found in the alignment, record the names of the
# taxa with sequences for pruning the tree
if empty_seq_count >= 1:
    taxa_in_alignment = []
    for record in SeqIO.parse(alignment_file, format=alignment_format):
        gapSeq = '-' * len(record.seq)
        if (str(record.seq).upper().replace(
                "N", "-")) == gapSeq:  # if it's just gaps
            pass
        else:
            taxa_in_alignment.append(record.id)

tree = EvolTree(tree_file)
out_tree_name = os.path.basename(tree_file)
out_tree_name = os.path.splitext(out_tree_name)[0]
out_tree_name = out_tree_name + '_' + gene_name + '.tre'

# If there is a new alignment, prune the tree down to the taxa that remain in
# the new alignment and write a new tree because EvolTree is shit and can't
# use the pruned tree saved in memory
if empty_seq_count >= 1:
    if len(taxa_in_alignment) >= 1:
        tree.prune(taxa_in_alignment, preserve_branch_length=True)
        tree.unroot()
        tree.write(outfile=out_tree_name, format=0)
        tree = EvolTree(out_tree_name)

tree.link_to_alignment(alignment_file)
Ejemplo n.º 16
0
__author__ = "Francois-Jose Serra"
__email__ = "*****@*****.**"
__licence__ = "GPLv3"
__version__ = "0.0"


try:
    input = raw_input
except NameError:
    pass


from ete3 import EvolTree
from ete3 import NodeStyle

tree = EvolTree("data/S_example/measuring_S_tree.nw")
tree.link_to_alignment("data/S_example/alignment_S_measuring_evol.fasta")

print(tree)

print("Tree and alignment loaded.")
input("Tree will be mark in order to contrast Gorilla and Chimpanzee as foreground \nspecies.")

marks = ["1", 3, "7"]

tree.mark_tree(marks, ["#1"] * 3)
print(tree.write())

# display marked branches in orange
for node in tree.traverse():
    if not hasattr(node, "mark"):
Ejemplo n.º 17
0
import sys, os, subprocess
import argparse
from ete3 import EvolTree

tree = EvolTree("tree.nw",
                binpath="/home/edu/miniconda3/envs/ete3/bin/ete3_apps/bin")
tree.link_to_alignment("infile.phy", alg_format="phylip")
tree.workdir = os.getcwd()

print(tree)

print('running model M0, for comparison with branch-site models...')

tree.run_model('M0', keep=True)
#tree.link_to_evol_model("/home/edu/Desktop/Bioinformatica/Mitogenomics/Chondrichthyes/Phylogenetic_Tree","M0")
chimaeriformes = tree.get_common_ancestor("HM147138.1", "HM147135.1")
#chimaeriformes =tree.get_common_ancestor("Human_ECP","Goril_ECP")

for leaf in chimaeriformes:
    tree.mark_tree([leaf.node_id], marks=["#1"])
#tree.run_model("bsA." + chimaeriformes)
#tree.mark_tree([leaf.node_id], marks = ["#1"])
print("Running")
print(tree.write())
tree.run_model('bsA.Chimaeriformes')
tree.run_model("bsA1.Chimaeriformes")

print('p-value of positive selection for sites on this branch is: ')
ps = tree.get_most_likely('bsA.Chimaeriformes', 'bsA1.Chimaeriformes')
print(str(ps))
rx = tree.get_most_likely('bsA1.Chimaeriformes', 'M0')
Ejemplo n.º 18
0
def main():
    """
    main function
    """
    tree = EvolTree (WRKDIR + 'tree.nw')
    tree.workdir = 'data/protamine/PRM1/paml/'

    random_swap(tree)
    tree.link_to_evol_model (WRKDIR + 'paml/fb/fb.out', 'fb')
    check_annotation (tree)
    tree.link_to_evol_model (WRKDIR + 'paml/M1/M1.out', 'M1')
    tree.link_to_evol_model (WRKDIR + 'paml/M2/M2.out', 'M2')
    tree.link_to_evol_model (WRKDIR + 'paml/M7/M7.out', 'M7')
    tree.link_to_evol_model (WRKDIR + 'paml/M8/M8.out', 'M8')
    tree.link_to_alignment  (WRKDIR + 'alignments.fasta_ali')
    print 'pv of LRT M2 vs M1: ',
    print tree.get_most_likely ('M2','M1')
    print 'pv of LRT M8 vs M7: ',
    print tree.get_most_likely ('M8','M7')


    tree.show (histfaces=['M2'])

    print 'The End.'
Ejemplo n.º 19
0
from ete3 import EvolTree
import sys

treepath=sys.argv[1]
treeout=sys.argv[2]

t = EvolTree(treepath)
ratites = {'aptHaa', 'aptRow', 'aptOwe', 'strCam', 'droNov', 'casCas', 'rheAme', 'rhePen'}
#annotate leaves
for leaf in t.traverse():
    if leaf.is_leaf():
        if leaf.name in ratites:
            leaf.add_features(mark="{RatiteLeaf}")
    else:
        #internal node, get all leaf names and make sure all are in ratites
        desc=set(leaf.get_leaf_names())
        checkDesc=desc - ratites
        if not checkDesc:
            leaf.add_features(mark="{RatiteInternal}")

t.write(outfile=treeout)
Ejemplo n.º 20
0
def parse_tree(treestring):
    t = EvolTree(treestring)
    return (t)
Ejemplo n.º 21
0
resultsfile = sys.argv[3]
test_to_use = sys.argv[4]

if not os.path.isfile(resultsfile):
    print(hog, 'NA', 'NA', 'NA', 'NA', sep="\t")
    quit()

if not os.path.isfile(treefile):
    print(hog, 'NA', 'NA', 'NA', 'NA', sep="\t")
    quit()

with open(treefile, 'r') as treefile:
    treestring = treefile.read().replace('\n', '')

treestring = re.sub(r"{\w+}", "", treestring)
t = EvolTree(treestring, format=1)
#ugly
ratites = {
    'droNov', 'casCas', 'strCam', 'aptHaa', 'aptOwe', 'aptRow', 'rheAme',
    'rhePen'
}
vl = {
    'calAnn', 'corBra', 'serCan', 'geoFor', 'melUnd', 'pseHum', 'taeGut',
    'ficAlb'
}
rand1 = {'colLiv', 'chaVoc', 'halLeu', 'taeGut', 'nipNip'}
rand2 = {'falPer', 'picPub', 'lepDis', 'melUnd', 'aquChr'}
if test_to_use == "ratites":
    testclade = ratites
elif test_to_use == "vl":
    testclade = vl
Ejemplo n.º 22
0
from ete3 import EvolTree
from string import ascii_letters

# CREATE TREE
fasta_lines = open("./whales.fasta", "r").readlines()

taxa = [l.replace('>', '').strip() for l in fasta_lines if l.startswith('>')]
taxa_map = { t: ascii_letters[i] for i, t in enumerate(taxa) }

taxa_string = '(' * (len(taxa) - 1) + '%s,%s)' % (ascii_letters[0], ascii_letters[1])
for t in ascii_letters[2:len(taxa)]:
    taxa_string = taxa_string + ',%s)' % t
taxa_string = taxa_string + ';'

align = ''.join(fasta_lines)
for t in taxa:
    align = align.replace(t, taxa_map[t])

tree = EvolTree(taxa_string)
tree.link_to_alignment(align)
#tree.link_to_evol_model("M2")
#tree.get_evol_model("M2")
print(tree.run_model.__doc__)
tree.run_model("fb")
Ejemplo n.º 23
0
"""

__author__ = "Francois-Jose Serra"
__email__ = "*****@*****.**"
__licence__ = "GPLv3"
__version__ = "0.0"

try:
    input = raw_input
except NameError:
    pass

from ete3 import EvolTree
from ete3 import NodeStyle

tree = EvolTree("data/S_example/measuring_S_tree.nw")
tree.link_to_alignment('data/S_example/alignment_S_measuring_evol.fasta')

print(tree)

print('Tree and alignment loaded.')
input(
    'Tree will be mark in order to contrast Gorilla and Chimpanzee as foreground \nspecies.'
)

marks = ['1', 3, '7']

tree.mark_tree(marks, ['#1'] * 3)
print(tree.write())

# display marked branches in orange
Ejemplo n.º 24
0
"""
15 Nov 2010

example to illustrate use of sites model, displaying and comparison
"""

__author__  = "Francois-Jose Serra"
__email__   = "*****@*****.**"
__licence__ = "GPLv3"
__version__ = "0.0"



from ete3 import EvolTree

tree = EvolTree ("data/S_example/measuring_S_tree.nw")
tree.link_to_alignment ('data/S_example/alignment_S_measuring_evol.fasta')

print (tree)

try:
    input = raw_input
except NameError:
    pass

input ('\n   tree and alignment loaded\n Hit some key, to start computation of site models M1 and M2.\n')

print ('running model M1')
tree.run_model ('M1')
print ('running model M2')
tree.run_model ('M2')
Ejemplo n.º 25
0
15 Nov 2010

example of computation and display of an ancestral sequence
computed under free'ratio model.
"""

__author__ = "Francois-Jose Serra"
__email__ = "*****@*****.**"
__licence__ = "GPLv3"
__version__ = "0.0"

from ete3 import TreeStyle
from ete3 import EvolTree
from ete3 import faces

tree = EvolTree("data/S_example/measuring_S_tree.nw")
tree.link_to_alignment('data/S_example/alignment_S_measuring_evol.fasta')

print(tree)

print('\n Running free-ratio model with calculation of ancestral sequences...')

tree.run_model('fb_anc')
#tree.link_to_evol_model('/tmp/ete3-codeml/fb_anc/out', 'fb_anc')

I = TreeStyle()
I.force_topology = False
I.draw_aligned_faces_as_table = True
I.draw_guiding_lines = True
I.guiding_lines_type = 2
I.guiding_lines_color = "#CCCCCC"
Ejemplo n.º 26
0
#get test class from file
testclassfile = sys.argv[2]

with open(testclassfile) as tf:
    target_tips = set(tf.read().splitlines())

for line in lines:
    if line == "":
        continue
    else:
        fields = line.split("\t")
        hog = fields[1]
        tree = fields[0]
        try:
            t = EvolTree(fields[2])
        except:
            continue
        for node in t.traverse():
            #UGLY!
            istarget = node_in_class(node, t, target_tips)
            brstat = node.dist
            nname = node.name
            if nname == "":
                nname = "-".join(node.get_leaf_names())
            try:
                pname = node.up.name
            except AttributeError:
                pname = "root"
            if pname == "":
                pname = "-".join(node.up.get_leaf_names())
Ejemplo n.º 27
0
"""
15 Nov 2010

first example, load a tree and compute free ratios model,
to find omega value of each branch.
"""

__author__  = "Francois-Jose Serra"
__email__   = "*****@*****.**"
__licence__ = "GPLv3"
__version__ = "0.0"


from ete3 import EvolTree

tree = EvolTree ("data/S_example/measuring_S_tree.nw")

print tree

raw_input ('\n   tree loaded, hit some key.\n')

print 'Now, it is necessary to link this tree to an alignment:'

tree.link_to_alignment ('data/S_example/alignment_S_measuring_evol.fasta')

raw_input ('\n   alignment loaded, hit some key to see.\n')

tree.show()

print '''
we will run free-ratio model that is one of models available through
Ejemplo n.º 28
0
     all_dists.append(dist)
 # find indexes of the three shortest distances
 try:
     idxes_of_3_smallest = np.argpartition(np.array(all_dists), 3)[:3]
 except ValueError:
     idxes_of_3_smallest = np.argpartition(
         np.array(all_dists),
         2)  # for the case that list is only 3 items long
 closest_seq_ids = [seqid]
 for d in idxes_of_3_smallest:
     closest_seq_ids.append(seqids_of_other_species[d])
 # ete3 has codeml handling implemented!! No need for own functions.
 subtree = t.copy()
 subtree.prune(closest_seq_ids, preserve_branch_length=True)
 subtree.unroot()
 evotree = EvolTree(subtree.write())
 subfasta = make_clean_fasta(closest_seq_ids, seqdatadict)
 if not subfasta:
     omega_list.append("NA")
     continue
 else:
     evotree.link_to_alignment(subfasta)
     workdirname = './codeml_' + "__".join(closest_seq_ids)
     evotree.workdir = workdirname
     list_of_tempdirs.append(workdirname)
     # mark the foreground branch
     foreground_leafnode = evotree & seqid
     #			print (seqid)
     #			print(foreground_leafnode.node_id)
     #			print (evotree.write())
     evotree.mark_tree([foreground_leafnode.node_id], ['#1'])
Ejemplo n.º 29
0
treefile=sys.argv[2] 
resultsfile=sys.argv[3] 

if not os.path.isfile(resultsfile):
    print(hog, 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', sep="\t")
    quit()

if not os.path.isfile(treefile):
    print(hog, 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', sep="\t")
    quit()

with open(treefile, 'r') as treefile:
    treestring=treefile.read().replace('\n', '')

treestring=re.sub(r"{\w+}", "", treestring)
t=EvolTree(treestring, format=1)

#now read results into list
with open(resultsfile, 'r') as f:
    reader=csv.reader(f)
    res_list=list(reader)

#make output table, one line per hog, 
rs={'hog' : hog, 'selected_nom' : 0, 'selected_holm' : 0, 'total_tests' : 0, 'nom_branches' : "", 'holm_branches' : "", 'tree' : treestring}

for line in res_list:
    taxa=line[0]
    if t.search_nodes(name=taxa):
        pval=float(line[6])
        pvalholm=float(line[7])
        node_id=trans_node(taxa, t)
Ejemplo n.º 30
0
#get test class from file
testclassfile=sys.argv[2]

with open(testclassfile) as tf:
    target_tips=set(tf.read().splitlines())

for line in lines:
    if line=="":
        continue
    else:
        fields=line.split("\t")
        hog=fields[1]
        tree=fields[0]
        try:
            t=EvolTree(fields[2])
        except:
            continue
        for node in t.traverse():
            #UGLY!
            istarget=node_in_class(node,t,target_tips)
            brstat=node.dist
            nname=node.name
            if nname=="":
                nname="-".join(node.get_leaf_names())
            try:
                pname=node.up.name
            except AttributeError:
                pname="root"
            if pname=="":
                pname="-".join(node.up.get_leaf_names())
Ejemplo n.º 31
0
__author__  = "Francois-Jose Serra"
__email__   = "*****@*****.**"
__licence__ = "GPLv3"
__version__ = "0.0"


try:
    input = raw_input
except NameError:
    pass


from ete3 import EvolTree
from ete3 import NodeStyle

tree = EvolTree ("data/S_example/measuring_S_tree.nw")
tree.link_to_alignment ('data/S_example/alignment_S_measuring_evol.fasta')

print (tree)

print ('Tree and alignment loaded.')
input ('Tree will be mark in order to contrast Gorilla and Chimpanzee as foreground \nspecies.')

marks = ['1', '3', '7']

tree.mark_tree (marks, ['#1'] * 3)
print (tree.write ())

print ('we can easily colorize marked branches')
# display marked branches in orange
for node in tree.traverse ():
def run_codeml(mark_id, aln_file, tree_file, sleep):
    logger.info('sub-process: {0}'.format(str(mark_id)))
    time.sleep(round(sleep / args.threads, 2))
    run_dir = os.path.join(output_dir, str(mark_id))
    os.makedirs(run_dir)
    tree = EvolTree(tree_file, format=0)
    tree.link_to_alignment(aln_file)
    tree.run_model('M0')
    tree.workdir = run_dir
    tree.mark_tree([mark_id], marks=['#1'])
    tree.run_model('bsA.' + str(mark_id))
    tree.run_model('bsA1.' + str(mark_id))
    ps = tree.get_most_likely('bsA.' + str(mark_id), 'bsA1.' + str(mark_id))
    rx = tree.get_most_likely('bsA1.' + str(mark_id), 'M0')
    bsA = tree.get_evol_model('bsA.' + str(mark_id))
    p_bsA = bsA.classes['proportions'][2]
    wfrg2a = bsA.classes['foreground w'][2]
    if ps < 0.05 and float(wfrg2a) > 1:
        result = [mark_id, ps, rx, p_bsA, 'positive selection']
    elif rx < 0.05 and ps >= 0.05:
        result = [mark_id, ps, rx, p_bsA, 'relaxation']
    else:
        result = [mark_id, ps, rx, p_bsA, 'no signal']
    return result
Ejemplo n.º 33
0
    for record in SeqIO.parse(alignment_file, format=alignment_format):
        gapSeq = '-' * len(record.seq)
        if (str(record.seq).upper().replace(
                "N", "-")) == gapSeq:  # if it's just gaps
            pass
        else:
            trimmed_alignment.append(record)
            taxa_in_alignment.append(record.id)

# Only write a new alignment if there is a new alignment
if empty_seq_count >= 1:
    if len(trimmed_alignment) >= 1:
        SeqIO.write(trimmed_alignment, handle=alignment_file, \
                    format=alignment_format)

tree = EvolTree(tree_file)
out_tree_name = os.path.basename(tree_file)
out_tree_name = os.path.splitext(out_tree_name)[0]
out_tree_name = out_tree_name + '_' + gene_name + '.tre'

# If there is a new alignment, prune the tree down to the taxa that remain in
# the new alignment
if empty_seq_count >= 1:
    if len(taxa_in_alignment) >= 1:
        tree.prune(taxa_in_alignment, preserve_branch_length=True)

test_taxa = []
with open(test_taxa_file, 'r') as test_taxa_list:
    for taxon in test_taxa_list:
        taxon = taxon.rstrip()
        test_taxa.append(taxon)
Ejemplo n.º 34
0
def run_evol_py(tree,
                alg,
                branch_model,
                site_models,
                workir='data/evol_output',
                tool_dir="ete3_apps/bin"):
    print(tree, alg, branch_model, site_models)
    builtin_apps_path = None
    builtin_apps_path = os.path.join(os.path.split(ete3_path)[0], tool_dir)

    tree = EvolTree(tree, binpath=builtin_apps_path)
    tree.link_to_alignment(alg)
    tree.workdir = workir

    ###branch model
    if branch_model:
        branch_model = str(branch_model)
        tree.run_model(branch_model)
        print(tree.get_evol_model(branch_model))

    ### site model
    for site_model in site_models:
        tree.run_model(site_model)
        #tree.run_model('SLR.lele')

    global evol_output_dir, final_evol_tree
    evol_output_dir = workir
    final_evol_tree = evol_output_dir + '/tree_evol_result.png'

    tree.render(final_evol_tree,
                layout=evol_clean_layout,
                histfaces=site_models)
    return tree
Ejemplo n.º 35
0
"""
15 Nov 2010

simple example to mark a tree and compute branch-site test of positive selection
"""

__author__  = "Francois-Jose Serra"
__email__   = "*****@*****.**"
__licence__ = "GPLv3"
__version__ = "0.0"


from ete3 import EvolTree


tree = EvolTree("data/S_example/measuring_S_tree.nw")
tree.link_to_alignment('data/S_example/alignment_S_measuring_evol.fasta')

print tree

raw_input('\n   tree and alignment loaded\nHit some key, to start computation of branch site models A and A1 on each branch.\n')

print 'running model M0, for comparison with branch-site models...'
tree.run_model('M0')

# each node/leaf has two kind of identifiers node_id and paml_id, to mark nodes we have to specify
# the node_id of the nodes we want to mark, and the kind of mark in this way:

for leaf in tree:
    leaf.node_id
    print '\n---------\nNow working with leaf ' + leaf.name
Ejemplo n.º 36
0
#!/usr/bin/python
"""
06 Feb 2011

use slr to compute evolutionary rates
"""

__author__  = "Francois-Jose Serra"
__email__   = "*****@*****.**"
__licence__ = "GPLv3"
__version__ = "0.0"

from ete3 import EvolTree


tree = EvolTree ("data/S_example/measuring_S_tree.nw")
tree.link_to_alignment ("data/S_example/alignment_S_measuring_evol.fasta")


tree.run_model ('SLR')

slr = tree.get_evol_model ('SLR')

slr.set_histface (up=False, kind='curve',errors=True,
                  hlines = [1.0,0.3], hlines_col=['black','grey'])

tree.show (histfaces=['SLR'])