Esempi in Python per EvolTree, esempi in Python per ete2.EvolTree

Esempio n. 1

0

Mostra file

File: phasePaml_plotTreeSimple.py Progetto: gglyptodon/phasePaml

def showTreeWithPictures(tree = None, alignment=None, branchLengths=True, bootstrapSupport=True, tolabel=None):

    print("ShowTreeWithPictures",tree, alignment, branchLengths,bootstrapSupport, tolabel)
    if alignment:
        t = EvolTree(tree, alignment,alg_format="paml")
        t.link_to_alignment(alignment,alg_format="paml")


    else:
        t = EvolTree(tree)

    nsFG = NodeStyle()
    nsFG["fgcolor"] = "darkgreen"
    nsFG["size"] = 8

    for node in t.traverse():
        print(node.node_id)
        if tolabel:
            if str(node.node_id) in tolabel:
                 node.set_style(nsFG)

    ts = TreeStyle()
    ts.show_leaf_name = True
    ts.show_branch_length = branchLengths
    ts.show_branch_support = bootstrapSupport
    out = FILE
    if branchLengths:
        out+="_Len"
    if bootstrapSupport:
        out+="_Boot"
    t.render(out+"_tree.pdf",tree_style=ts)
    t.render(out+"_tree.png",tree_style=ts)
    if INTERACTIVE:
        t.show(tree_style=ts)

Esempio n. 2

0

Mostra file

File: PAMLTREE_makeTreesAndCtl.py Progetto: gglyptodon/phasePaml

def labelForPaml(unlabelledTreeAsString,listOfNodes, tree):
    t = EvolTree(unlabelledTreeAsString)
    marks = []
    count = 1
    for i in listOfNodes:
        marks.append("#"+str(count))
        count+=1
    t.mark_tree(listOfNodes, marks=marks)
    print(t.write())
    outfile = tree+"."+"_".join(listOfNodes)
    with open(outfile, 'w') as out:
        out.write(t.write())

Esempio n. 3

0

Mostra file

File: PAML_makeCtl.py Progetto: gglyptodon/phasePaml

def showTreeNodes(unlabelledTreeAsString):
    t = EvolTree(unlabelledTreeAsString)
    for node in t.traverse():
        #print(node)
        #print(node.node_id, node.name)
        #if (node.name.split("_")[0] in GENES):
        #    print(node.name, node.node_id)
        leaves  = node.get_leaf_names()
        #leaves = [l for l in leaves if l.split("_")[0] in GENES ]
        if leaves !=[]:
            print(node)
            print(leaves, node.name, node.node_id)
            print("\n")

Esempio n. 4

0

Mostra file

File: PAMLTREE_makeTreesAndCtl.py Progetto: gglyptodon/phasePaml

def showAlignmentWithTree(unlabelledTreeAsString,alignment):
    t = EvolTree(unlabelledTreeAsString, alignment,alg_format="paml")
    t.link_to_alignment(alignment, alg_format="paml")
    for node in t.traverse():
        print(node)
        print(node.node_id, node.name)
    print(t.write())
    #print(t)
    t.show() #layout=evol_clean_layout)

Esempio n. 5

0

Mostra file

File: tree_labeler.py Progetto: frogsicle/phasePAML

def showAlignmentWithTree(tree,alignment):
    print(tree)
    t = EvolTree(tree, alignment,alg_format="paml")
    nsFG = NodeStyle()
    nsFG["fgcolor"] = "darkgreen"
    nsFG["size"] = 15
    #print(t)
    #t.run_model ('fb.example')
    # t.show()

    t.link_to_alignment(alignment, alg_format="paml")
    for node in t.traverse():
        print(node)
        #if (node.name.split("_")[0] in GENES):
        #    print(node.name, node.node_id)
        #    if (node.name.split("_")[0] == "GRMZM2G083841"):
        #        node.add_face(ImgFace("83841.1.png", height=50, width=50), column=1, position="aligned")
        #    if (node.name.split("_")[0] == "GRMZM2G473001"):
        #        node.add_face(ImgFace("473001.png", height=50, width=50), column=1, position="aligned")
        node.add_face(TextFace(str(node.node_id)),column=0)
        #node.add_face(ImgFace("tux.png", height=50), column=1)
        #    node.set_style(nsFG)
        leaves  = node.get_leaf_names()
        #leaves = [l for l in leaves if l.split("_")[0] in GENES ]
        if leaves !=[]:
            print(node.name, node.node_id)
            #print(node.node_id)
    #t.mark_tree([8], marks=["#1"])
    #print(t.write())
    #print(alignment)
    #print(t)
    #t.show() #layout=evol_clean_layout)

    ts = TreeStyle()
    ts.show_leaf_name = True
    ts.show_branch_length = True
    ts.show_branch_support = True
    t = fakeUnroot(t)
    t.show(tree_style=ts)
    t.render("tree.pdf", tree_style=ts)

Esempio n. 6

0

Mostra file

File: TREE_makeTrees.py Progetto: gglyptodon/phasePaml

def showAlignmentWithTree(unlabelledTreeAsString,alignment):
    t = EvolTree(unlabelledTreeAsString, alignment,alg_format="paml")

    #print(t)
    #t.run_model ('fb.example')
   # t.show()

    t.link_to_alignment(alignment, alg_format="paml")
    for node in t.traverse():
        print(node)
        print(node.node_id, node.name)
    #t.mark_tree([8], marks=["#1"])
    #print(t.write())
    print(alignment)
    #print(t)
    t.show() #layout=evol_clean_layout)

Esempio n. 7

0

Mostra file

File: 6_ancestral_sequence.py Progetto: jerryatmda/ete

example of computation and display of an ancestral sequence
computed under free'ratio model.
"""

__author__ = "Francois-Jose Serra"
__email__ = "*****@*****.**"
__licence__ = "GPLv3"
__version__ = "0.0"

from ete2 import TreeStyle
from ete2 import EvolTree
from ete2 import faces


tree = EvolTree("data/S_example/measuring_S_tree.nw")
tree.link_to_alignment("data/S_example/alignment_S_measuring_evol.fasta")

print tree

print "\n Running free-ratio model with calculation of ancestral sequences..."

tree.run_model("fb_anc")
# tree.link_to_evol_model('/tmp/ete2-codeml/fb_anc/out', 'fb_anc')

I = TreeStyle()
I.force_topology = False
I.draw_aligned_faces_as_table = True
I.draw_guiding_lines = True
I.guiding_lines_type = 2
I.guiding_lines_color = "#CCCCCC"

Esempio n. 8

0

Mostra file

File: PAMLTREE_makeTreesAndCtl.py Progetto: gglyptodon/phasePaml

def showTreeNodes(unlabelledTreeAsString):
    t = EvolTree(unlabelledTreeAsString)
    for node in t.traverse():
        print(node)
        print(node.node_id, node.name)

Esempio n. 9

0

Mostra file

            print "Alignment missing: " + align_file
            no_results_file.write(cluster + "\n")

        #Check alignment length. If only two sequences, move to the next one
        fasta_count = 0
        for line in open(align_file, 'r'):
            line = line.strip()
            if line.startswith(">"):
                fasta_count += 1

        if not fasta_count > 2:
            continue

        node_id_2_names = defaultdict()

        for descend in EvolTree(tree_file).iter_descendants():
            node_id_2_names[descend.node_id] = descend.get_leaf_names()

        #Results, the first element has:
        #The second is a dictionary with the positive selected sites

        #results_dict[cluster] = run_site_branch(cluster, tree_file, align_file, temp_folder, plot_folder)

        p = pool.apply_async(run_site_branch,
                             args=(
                                 cluster,
                                 tree_file,
                                 align_file,
                                 temp_folder,
                                 plot_folder,
                             ),

Esempio n. 10

0

Mostra file

File: 2_sites_model.py Progetto: a1an77/ete

"""
15 Nov 2010

example to illustrate use of sites model, displaying and comparison
"""

__author__  = "Francois-Jose Serra"
__email__   = "*****@*****.**"
__licence__ = "GPLv3"
__version__ = "0.0"



from ete2 import EvolTree

tree = EvolTree ("data/S_example/measuring_S_tree.nw")
tree.link_to_alignment ('data/S_example/alignment_S_measuring_evol.fasta')

print tree

raw_input ('\n   tree and alignment loaded\n Hit some key, to start computation of site models M1 and M2.\n')

print 'running model M1'
tree.run_model ('M1')
print 'running model M2'
tree.run_model ('M2')

print '\n\n comparison of models M1 and M2, p-value: ' + str(tree.get_most_likely ('M2','M1'))

#tree.show()

Esempio n. 11

0

Mostra file

File: RunPaml_SiteTests.py Progetto: juanu/CompMicroGenom

def run_site_tests(cluster_name, treefile, alignment, folder_temp, folder_plots):
    from ete2 import EvolTree
    from ete2.treeview.layouts import evol_clean_layout
    import os
    from collections import defaultdict
    import math
    from scipy.stats import chi2

    print "Processing cluster: " + cluster_name

    tree = EvolTree(treefile)
    tree.link_to_alignment(alignment, alg_format="fasta", nucleotides=True)

    # Create temporal folder
    temp_cluster_folder = folder_temp + "/" + cluster_name

    if not os.path.exists(temp_cluster_folder):
        os.makedirs(temp_cluster_folder)

    tree.workdir = temp_cluster_folder

    # Run M1 as the null model
    tree.run_model("M1")

    # Run M2 as the alternative model
    tree.run_model("M2")
    model1 = tree.get_evol_model("M1")
    model2 = tree.get_evol_model("M2")  # Get the results of the model

    # Run the LRT test, using ETE
    # pval = tree.get_most_likely("M2", "M1")

    # Get the positive selected sites
    ps_sites = defaultdict()
    total_sites = 0
    sites_over_95 = 0

    for s in range(len(model2.sites["BEB"]["aa"])):
        p_value_site = float(model2.sites["BEB"]["p2"][s])

        if p_value_site > 0.50:
            ps_sites[s] = [model2.sites["BEB"]["aa"][s], model2.sites["BEB"]["p2"][s]]
            total_sites += 1

            if p_value_site > 0.95:
                sites_over_95 += 1

    # LRT Test
    lrt_value = 2 * math.fabs(model1.lnL - model2.lnL)  # LRT test value
    pval = 1 - chi2.cdf(lrt_value, 2)  # p-value based on chi-square

    test_status = None

    # Evidence of positive selection in the branch
    omega_value = float(model2.classes["w"][2])
    proportion_sites = float(model2.classes["proportions"][2])

    # Plot file
    plot_file = folder_plots + "/" + cluster_name

    col2 = {"NS": "black", "RX": "black", "RX+": "black", "CN": "black", "CN+": "black", "PS": "black", "PS+": "black"}

    if pval < 0.05 and omega_value > 1:
        # Save plots, both in jpg and svg of the clusters with evidence of positive selection
        test_status = "Positive"
        model2.set_histface(
            up=False,
            kind="curve",
            colors=col2,
            ylim=[0, 4],
            hlines=[2.5, 1.0, 4.0, 0.5],
            hlines_col=["orange", "yellow", "red", "cyan"],
            errors=True,
        )

        tree.render(plot_file + ".svg", layout=evol_clean_layout, histfaces=["M2"])
        # tree.render(plot_file + ".jpg", layout=evol_clean_layout, histfaces=['M2'])
    else:
        # print "no signal"
        test_status = None

    result_entry = [cluster_name, omega_value, proportion_sites, pval, test_status, total_sites, sites_over_95]

    # print result_entry
    # print ps_sites
    # node_results[node.node_id] = [result_entry, ps_sites]
    output_list = [result_entry, ps_sites]

    return output_list

Esempio n. 12

0

Mostra file

File: test_protamine.py Progetto: a1an77/ete

def main():
    """
    main function
    """
    tree = EvolTree (WRKDIR + 'tree.nw')
    tree.workdir = 'data/protamine/PRM1/paml/'

    random_swap(tree)
    tree.link_to_evol_model (WRKDIR + 'paml/fb/fb.out', 'fb')
    check_annotation (tree)
    tree.link_to_evol_model (WRKDIR + 'paml/M1/M1.out', 'M1')
    tree.link_to_evol_model (WRKDIR + 'paml/M2/M2.out', 'M2')
    tree.link_to_evol_model (WRKDIR + 'paml/M7/M7.out', 'M7')
    tree.link_to_evol_model (WRKDIR + 'paml/M8/M8.out', 'M8')
    tree.link_to_alignment  (WRKDIR + 'alignments.fasta_ali')
    print 'pv of LRT M2 vs M1: ',
    print tree.get_most_likely ('M2','M1')
    print 'pv of LRT M8 vs M7: ',
    print tree.get_most_likely ('M8','M7')

    
    tree.show (histfaces=['M2'])

    print 'The End.'

Esempio n. 13

0

Mostra file

def run_site_tests(cluster_name, treefile, alignment, folder_temp,
                   folder_plots):
    from ete2 import EvolTree
    from ete2.treeview.layouts import evol_clean_layout
    import os
    from collections import defaultdict
    import math
    from scipy.stats import chi2

    print "Processing cluster: " + cluster_name

    tree = EvolTree(treefile)
    tree.link_to_alignment(alignment, alg_format="fasta", nucleotides=True)

    #Create temporal folder
    temp_cluster_folder = folder_temp + "/" + cluster_name

    if not os.path.exists(temp_cluster_folder):
        os.makedirs(temp_cluster_folder)

    tree.workdir = temp_cluster_folder

    #Run M1 as the null model
    tree.run_model("M1")

    #Run M2 as the alternative model
    tree.run_model("M2")
    model1 = tree.get_evol_model("M1")
    model2 = tree.get_evol_model("M2")  # Get the results of the model

    #Run the LRT test, using ETE
    #pval = tree.get_most_likely("M2", "M1")

    #Get the positive selected sites
    ps_sites = defaultdict()
    total_sites = 0
    sites_over_95 = 0

    for s in range(len(model2.sites['BEB']['aa'])):
        p_value_site = float(model2.sites['BEB']['p2'][s])

        if p_value_site > 0.50:
            ps_sites[s] = [
                model2.sites['BEB']['aa'][s], model2.sites['BEB']['p2'][s]
            ]
            total_sites += 1

            if p_value_site > 0.95:
                sites_over_95 += 1

    #LRT Test
    lrt_value = 2 * math.fabs(model1.lnL - model2.lnL)  # LRT test value
    pval = 1 - chi2.cdf(lrt_value, 2)  # p-value based on chi-square

    test_status = None

    #Evidence of positive selection in the branch
    omega_value = float(model2.classes['w'][2])
    proportion_sites = float(model2.classes['proportions'][2])

    #Plot file
    plot_file = folder_plots + "/" + cluster_name

    col2 = {
        'NS': 'black',
        'RX': 'black',
        'RX+': 'black',
        'CN': 'black',
        'CN+': 'black',
        'PS': 'black',
        'PS+': 'black'
    }

    if pval < 0.05 and omega_value > 1:
        #Save plots, both in jpg and svg of the clusters with evidence of positive selection
        test_status = "Positive"
        model2.set_histface(up=False,
                            kind='curve',
                            colors=col2,
                            ylim=[0, 4],
                            hlines=[2.5, 1.0, 4.0, 0.5],
                            hlines_col=['orange', 'yellow', 'red', 'cyan'],
                            errors=True)

        tree.render(plot_file + ".svg",
                    layout=evol_clean_layout,
                    histfaces=['M2'])
        #tree.render(plot_file + ".jpg", layout=evol_clean_layout, histfaces=['M2'])
    else:
        #print "no signal"
        test_status = None

    result_entry = [
        cluster_name, omega_value, proportion_sites, pval, test_status,
        total_sites, sites_over_95
    ]

    # print result_entry
    #print ps_sites
    #node_results[node.node_id] = [result_entry, ps_sites]
    output_list = [result_entry, ps_sites]

    return output_list

Esempio n. 14

0

Mostra file

File: 1_freeratio.py Progetto: a1an77/ete

"""
15 Nov 2010

first example, load a tree and compute free ratios model,
to find omega value of each branch.
"""

__author__  = "Francois-Jose Serra"
__email__   = "*****@*****.**"
__licence__ = "GPLv3"
__version__ = "0.0"


from ete2 import EvolTree

tree = EvolTree ("data/S_example/measuring_S_tree.nw")

print tree

raw_input ('\n   tree loaded, hit some key.\n')

print 'Now, it is necessary to link this tree to an alignment:'

tree.link_to_alignment ('data/S_example/alignment_S_measuring_evol.fasta')

raw_input ('\n   alignment loaded, hit some key to see.\n')

tree.show()

print '''
we will run free-ratio model that is one of models available through

Esempio n. 15

0

Mostra file

File: PAML_makeCtl.py Progetto: gglyptodon/phasePaml

def labelForPamlRegex(unlabelledTreeAsString, regex, tree):
    pattern = re.compile(regex)
    t = EvolTree(unlabelledTreeAsString)
    marks = []
    count = 1
    outfiles = []
    #nsFG=TreeStyle()
    nsMatch = NodeStyle() #match
    nsMatch["fgcolor"] = "blue"
    nsMatch["size"] = 10
    nsBG = NodeStyle()
    nsBG["fgcolor"] = "black"
    nsBG["size"] = 0
    nsFG = []

    tolabelreg = []
    for i in range(0,MAX_PARENT):
        nsFG.append( NodeStyle())
        nsFG[i]["size"] = 10
        nsFG[i]["fgcolor"] = NODE_COLORS[i]

    isroot=True
    for node in t.traverse():
        node.set_style(nsBG)
        if node.is_root():
            print("root")
            node.unroot()
            node._support = None

    for node in t.get_descendants():
        node.add_face(TextFace(node.node_id), column=0)

    #traverse and match
    for node in t.traverse():
        if re.match(pattern, node.name):
            print("MATCH", node.name, node.node_id)
            node.set_style(nsMatch)
            n = node
            try:
                for i in range(0,MAX_PARENT):

                    n = n.up
                    n.set_style(nsFG[i])
                    marks.append("#"+str(count))
                    print(count)
                    t.mark_tree([str(count)], marks=marks)
                    #just label everything with #1
                    print(t.write())

                    tolabelreg.append(str(n.node_id))

                    outfile = tree+"."+"_".join([str(n.node_id)])
                    with open(outfile, 'w') as out:
                        out.write(t.write())
                    outfiles.append(outfile)

            except AttributeError:
                pass

            marks.append("#"+str(count))
            print(count)
            t.mark_tree([str(count)], marks=marks)
            #just label everything with #1
            print(t.write())

            outfile = tree+"."+"_".join([str(node.node_id)])
            with open(outfile, 'w') as out:
                out.write(t.write())
                outfiles.append(outfile)
    #t.show()
    t.render(tree+".png")
    return(outfiles, tolabelreg)

Esempio n. 16

0

Mostra file

File: TREE_makePrettyTrees.py Progetto: gglyptodon/phasePaml

def showTreeWithPictures(tree = None, alignment=None, branchLengths=True, bootstrapSupport=True, tolabel=None,showZScores=False,showLogs=False ):
    print(PICS)
    
    print("ShowTreeWithPictures",tree, alignment, branchLengths,bootstrapSupport, tolabel,showZScores,showLogs )
    if not alignment:
        nsFG = NodeStyle()
        nsFG["fgcolor"] = "darkgreen"
        nsFG["size"] = 8
        t = EvolTree(tree)
        #todo:label
        #
            
        for node in t.traverse():
            print(node.node_id)
            if tolabel:
                if str(node.node_id) in tolabel:
                     node.set_style(nsFG)
                #q'n'd 
            if (node.name.split("_")[0]+".png" in PICS):
                print(node.name.split("_")[0]+".png")
                node.add_face(ImgFace(PICDIR+os.sep+node.name.split("_")[0]+".png", height=50), column=1, position="aligned")
            #non GRZM identifier
            elif (node.name+".png" in PICS):
                print(node.name+".png")
                node.add_face(ImgFace(PICDIR+os.sep+node.name+".png", height=50), column=1, position="aligned")
            
            
        ts = TreeStyle()
        ts.show_leaf_name = True
        ts.show_branch_length = branchLengths
        ts.show_branch_support = bootstrapSupport
        out = FILE
        if branchLengths:
            out+="_Len"
        if bootstrapSupport:
            out+="_Boot"
        if Z:
            out+="_Z"
        if L:
            out+="_L"
        t.render(out+"_tree.pdf",tree_style=ts)
        t.render(out+"_tree.png",tree_style=ts)
        if INTERACTIVE:
            t.show(tree_style=ts)
        
    else:
        t = EvolTree(tree, alignment,alg_format="paml")
        t.link_to_alignment(alignment,alg_format="paml")
        #todo label
        #todo check treestyle
        
        #ts = TreeStyle()
        #ts.show_leaf_name = True
        #ts.show_branch_length = branchLength
        #ts.show_branch_support = bootstrapSupport
        t.show()

Esempio n. 17

0

Mostra file

File: tree_labeler.py Progetto: frogsicle/phasePAML

def label_regex(unlabeled_tree, regex, treefile, depth=4,
                model_list=None, paml_msa=None, outfile=None):
    pattern = re.compile(regex)
    t = EvolTree(unlabeled_tree)
    marks = []
    count = 1
    outfiles = []
    ts = TreeStyle()
    ts.mode = "c"
    nsMatch = NodeStyle()  # match
    nsMatch["fgcolor"] = "blue"
    nsMatch["size"] = 10
    nsBG = NodeStyle()
    nsBG["fgcolor"] = "black"
    nsBG["size"] = 0
    nsFG = []

    tolabelreg = []
    for i in range(0, depth):
        nsFG.append(NodeStyle())
        nsFG[i]["size"] = 10
        nsFG[i]["fgcolor"] = "blue"

    #isroot = True
    #for node in t.traverse():
    #    node.set_style(nsBG)
    #    if node.is_root():
    #        print("root")
    #       node.unroot()
    #       node._support = None

    for node in t.get_descendants():
        node.add_face(TextFace(node.node_id), column=0)

    # traverse and match
    for node in t.traverse():
        if re.match(pattern, node.name):
            node.set_style(nsMatch)
            n = node
            try:
                for i in range(0, depth):
                    n = n.up
                    n.set_style(nsFG[i])
                    marks.append("#" + str(count))
                    #print(count)
                    t.mark_tree([str(count)], marks=marks)
                    #just label everything with #1
                    tolabelreg.append(str(n.node_id))

                    outfile = treefile + "." + "_".join([str(n.node_id)])
                    with open(outfile, 'w') as out:
                        out.write(t.write())
                    outfiles.append(outfile)

            except AttributeError:
                pass
        else:
            node.set_style(nsBG)
    for f in tolabelreg:
        print(f,"FFF")
        for m in model_list:
            generateCtl(model=m, treefile=treefile+"."+f, seqfile=paml_msa, outfile=treefile+"."+f,
                                            generateOther=False)

    t = fakeUnroot(t)
    t.render(treefile+".png", tree_style=ts)
    return outfiles, tolabelreg

Esempio n. 18

0

Mostra file

File: RunPaml_BranchTests.py Progetto: maggishaggy/CompMicroGenom

        results_list.append(entry_results)
        site_file.close()

    #Create the pool of processors
    pool = multiprocessing.Pool(args.num_processors)

    run_results = []

    for cluster in clusters_to_analyze:

        tree_file = args.tree_folder + "/" + cluster + ".tre"
        align_file = args.align_folder + "/" + cluster + ".fna"

        node_id_2_names = defaultdict()

        for entry in EvolTree(tree_file).iter_descendants():
            node_id_2_names[entry.node_id] = entry.get_leaf_names()

        #Check that the files exists
        if not os.path.exists(tree_file):
            print "Tree file missing: " + tree_file
            no_results_file.write(cluster + "\n")
            continue

        if not os.path.exists(align_file):
            print "Alignment missing: " + align_file
            no_results_file.write(cluster + "\n")

        #Results, the first element has:
        #The second is a dictionary with the positive selected sites

Esempio n. 19

0

Mostra file

File: 5_branchsite_cladetest.py Progetto: a1an77/ete

15 Nov 2010

example of tests for different rates among sites in clades
"""

__author__  = "Francois-Jose Serra"
__email__   = "*****@*****.**"
__licence__ = "GPLv3"
__version__ = "0.0"



from ete2 import EvolTree
from ete2 import NodeStyle

tree = EvolTree ("data/S_example/measuring_S_tree.nw")
tree.link_to_alignment ('data/S_example/alignment_S_measuring_evol.fasta')

print tree

print 'Tree and alignment loaded.'
raw_input ('Tree will be mark in order to contrast Gorilla and Chimpanzee as foreground \nspecies.')

marks = ['1', 3, '7']

tree.mark_tree (marks, ['#1'] * 3)
print tree.write ()

# display marked branches in orange
for node in tree.traverse ():
    if not hasattr (node, 'mark'):

Esempio n. 20

0

Mostra file

File: RunPaml_BranchTests.py Progetto: maggishaggy/CompMicroGenom

def run_branch_test(cluster_name, treefile, alignment, folder_temp, folder_plots):
    from ete2 import EvolTree
    from ete2.treeview.layouts import evol_clean_layout
    import os
    from collections import defaultdict
    import math
    from scipy.stats import chi2

    print "Processing cluster: " + cluster_name

    tree = EvolTree(treefile)
    tree.link_to_alignment(alignment, alg_format="fasta", nucleotides=True)

    #Create temporal folder
    temp_cluster_folder = folder_temp + "/" + cluster_name

    if not os.path.exists(temp_cluster_folder):
        os.makedirs(temp_cluster_folder)

    tree.workdir = temp_cluster_folder

    #Run M0 as the null model
    tree.run_model("M0")

    #Look at the site selection on each branch

    printed_tree = 0

    i = 0

    #Output list with the results
    output_list = []

    for node in tree.iter_descendants():

        #Mark the tree for the leaf under analysis
        tree.mark_tree([node.node_id], marks=["#1"])

        #Use the node id as folder name
        temp_leaf_name = str(node.node_id)

        print "Processing: " + cluster_name + " " + temp_leaf_name + " " + ",".join(node.get_leaf_names())

        #Run computation of each model.
        #From the notes on ETE:
        # to organize a bit, we name model with the name of the marked node
        # any character after the dot, in model name, is not taken into account
        # for computation. (have a look in /tmp/ete2.../bsA.. directory)

        tree.run_model("bsA." + temp_leaf_name)
        tree.run_model("bsA1." + temp_leaf_name)

        bsA = tree.get_evol_model("bsA." + temp_leaf_name)
        bsA1 = tree.get_evol_model("bsA1." + temp_leaf_name)

        ps_sites = defaultdict()
        total_sites = 0
        sites_over_95 = 0

        for s in range(len(bsA.sites['BEB']['aa'])):
            p_value_site = float(bsA.sites['BEB']['p2'][s])

            if p_value_site > 0.50:
                ps_sites[s] = [bsA.sites['BEB']['aa'][s], bsA.sites['BEB']['p2'][s]]
                total_sites += 1

                if p_value_site > 0.95:
                    sites_over_95 += 1

        #ps = float(tree.get_most_likely("bsA." + temp_leaf_name, "bsA1." + temp_leaf_name))
        rx = float(tree.get_most_likely("bsA1." + temp_leaf_name, "M0"))

        lrt_value = 2 * math.fabs(bsA1.lnL - bsA.lnL)  # LRT test value
        ps = 1 - chi2.cdf(lrt_value, 1)  # p-value based on chi-square


        test_status = None

        #Evidence of positive selection in the branch
        omega_value = float(bsA.classes['foreground w'][2])
        proportion_sites = float(bsA.classes['proportions'][2])

        #Plot file
        plot_file = folder_plots + "/" + cluster_name

        if ps < 0.05 and omega_value > 1:
            #Save plots, both in jpg and svg of the clusters with evidence of positive selection
            test_status = "Positive"

            if printed_tree == 0:

                #tree.render(plot_file + ".svg", layout=evol_clean_layout)
                #tree.render(plot_file + ".jpg", layout=evol_clean_layout)
                printed_tree = 1

            else:
                continue

        elif rx < 0.05 and ps >= 0.05:
            test_status = "Relaxed"

        else:
            #print "no signal"
            test_status = None

        #Remove marks on the tree
        tree.mark_tree(map(lambda x: x.node_id, tree.get_descendants()), marks=[''] * len(tree.get_descendants()),
                       verbose=False)

        result_entry = [cluster_name, node.node_id, omega_value, proportion_sites, ps, test_status,
                        total_sites, sites_over_95, ",".join(node.get_leaf_names())]

       # print result_entry
        #print ps_sites
        #node_results[node.node_id] = [result_entry, ps_sites]
        output_list = [result_entry, ps_sites]

    return output_list

Esempio n. 21

0

Mostra file

File: 3_branchsite_test.py Progetto: a1an77/ete

"""
15 Nov 2010

simple example to mark a tree and compute branch-site test of positive selection
"""

__author__  = "Francois-Jose Serra"
__email__   = "*****@*****.**"
__licence__ = "GPLv3"
__version__ = "0.0"


from ete2 import EvolTree


tree = EvolTree("data/S_example/measuring_S_tree.nw")
tree.link_to_alignment('data/S_example/alignment_S_measuring_evol.fasta')

print tree

raw_input('\n   tree and alignment loaded\nHit some key, to start computation of branch site models A and A1 on each branch.\n')

print 'running model M0, for comparison with branch-site models...'
tree.run_model('M0')

# each node/leaf has two kind of identifiers node_id and paml_id, to mark nodes we have to specify
# the node_id of the nodes we want to mark, and the kind of mark in this way:

for leaf in tree:
    leaf.node_id
    print '\n---------\nNow working with leaf ' + leaf.name