예제 #1
0
def run_evol_py(tree,
                alg,
                branch_model,
                site_models,
                workir='data/evol_output',
                tool_dir="ete3_apps/bin"):
    print(tree, alg, branch_model, site_models)
    builtin_apps_path = None
    builtin_apps_path = os.path.join(os.path.split(ete3_path)[0], tool_dir)

    tree = EvolTree(tree, binpath=builtin_apps_path)
    tree.link_to_alignment(alg)
    tree.workdir = workir

    ###branch model
    if branch_model:
        branch_model = str(branch_model)
        tree.run_model(branch_model)
        print(tree.get_evol_model(branch_model))

    ### site model
    for site_model in site_models:
        tree.run_model(site_model)
        #tree.run_model('SLR.lele')

    global evol_output_dir, final_evol_tree
    evol_output_dir = workir
    final_evol_tree = evol_output_dir + '/tree_evol_result.png'

    tree.render(final_evol_tree,
                layout=evol_clean_layout,
                histfaces=site_models)
    return tree
예제 #2
0
def count_omega(align_file, gene_name):
    print(gene_name)
    tree = EvolTree(tree_file)
    tree.link_to_alignment(align_file)
    #
    # #free branch ratio count
    tree.run_model('fb')
    fb_results = tree.get_evol_model('fb')
    print(fb_results)
    with open(temp, 'w') as temp_file:
        temp_file.write(str(fb_results))
    write_in_table(gene_name)
def run_codeml(mark_id, aln_file, tree_file, sleep):
    logger.info('sub-process: {0}'.format(str(mark_id)))
    time.sleep(round(sleep / args.threads, 2))
    run_dir = os.path.join(output_dir, str(mark_id))
    os.makedirs(run_dir)
    tree = EvolTree(tree_file, format=0)
    tree.link_to_alignment(aln_file)
    tree.run_model('M0')
    tree.workdir = run_dir
    tree.mark_tree([mark_id], marks=['#1'])
    tree.run_model('bsA.' + str(mark_id))
    tree.run_model('bsA1.' + str(mark_id))
    ps = tree.get_most_likely('bsA.' + str(mark_id), 'bsA1.' + str(mark_id))
    rx = tree.get_most_likely('bsA1.' + str(mark_id), 'M0')
    bsA = tree.get_evol_model('bsA.' + str(mark_id))
    p_bsA = bsA.classes['proportions'][2]
    wfrg2a = bsA.classes['foreground w'][2]
    if ps < 0.05 and float(wfrg2a) > 1:
        result = [mark_id, ps, rx, p_bsA, 'positive selection']
    elif rx < 0.05 and ps >= 0.05:
        result = [mark_id, ps, rx, p_bsA, 'relaxation']
    else:
        result = [mark_id, ps, rx, p_bsA, 'no signal']
    return result
예제 #4
0
print ('running model M1')
tree.run_model ('M1')
print ('running model M2')
tree.run_model ('M2')

print ('\n\n comparison of models M1 and M2, p-value: ' + str(tree.get_most_likely ('M2','M1')))

#tree.show()

print ('by default the hist represented is this one:')

tree.show (histfaces=['M2'])

print ('but we can choose between many others...')

model2 = tree.get_evol_model ('M2')

col2 = {'NS' : 'black', 'RX' : 'black',
        'RX+': 'black', 'CN' : 'black',
        'CN+': 'black', 'PS' : 'black', 'PS+': 'black'}


model2.set_histface (up=False, kind='curve', colors=col2, ylim=[0,4], hlines = [2.5, 1.0, 4.0, 0.5], header = 'Many lines, error boxes, background black',
                     hlines_col=['orange', 'yellow', 'red', 'cyan'], errors=True)

tree.show(histfaces=['M2'])

model2.set_histface (up=False, kind='stick', hlines = [1.0,0.3], hlines_col=['black','grey'])
tree.show(histfaces=['M2'])

col = {'NS' : 'grey', 'RX' : 'black',
예제 #5
0
            continue
        else:
            evotree.link_to_alignment(subfasta)
            workdirname = './codeml_' + "__".join(closest_seq_ids)
            evotree.workdir = workdirname
            list_of_tempdirs.append(workdirname)
            # mark the foreground branch
            foreground_leafnode = evotree & seqid
            #			print (seqid)
            #			print(foreground_leafnode.node_id)
            #			print (evotree.write())
            evotree.mark_tree([foreground_leafnode.node_id], ['#1'])
            #			print (evotree.write())

            evotree.run_model('b_free.run')
            b_free_fit = evotree.get_evol_model('b_free.run')
            out_branches_dict = b_free_fit.branches
            for b in out_branches_dict:
                if out_branches_dict[b]["mark"] == " #1":
                    # check if there are at least 1 synonymous substitutions expected on this branch... otherwise not very meaningful to estimate omega (it will be very high).
                    if out_branches_dict[b]["S"] * out_branches_dict[b][
                            "dS"] >= 1.0:
                        omega = out_branches_dict[b]["w"]
                    else:
                        omega = "NA"
                    break
            omega_list.append(omega)
    numeric_omegas = [float(x) for x in omega_list if not x == "NA"]
    try:
        avg_omega = sum(numeric_omegas) / float(len(numeric_omegas))
    except ZeroDivisionError:
예제 #6
0
LRT between b_free and M0 (that is one or two rates of omega value)
p-value ofthis comparison is:''')
print (tree.get_most_likely ('b_free.137', 'M0'))

input ('''
Now test if foreground rate is significantly different of 1.
(b_free with significantly better likelihood than b_neut)
if significantly different, and higher than one, we will be under
positive selection, if different and lower than 1 we will be under
negative selection. And finally if models are not significantly different
we should accept null hypothesis that omega value on marked branches is
equal to 1, what would be a signal of relaxation.
p-value for difference in rates between marked branches and the rest:''')
print (tree.get_most_likely ('b_free.137', 'M0'))
print ('p-value representing significance that omega is different of 1:')
print (tree.get_most_likely ('b_free.137', 'b_neut.137'))

print ('value of omega in marked branch (frg branch):')
b_free = tree.get_evol_model ('b_free.137')
print (b_free.branches[1]['w'])

print ('and value of omega for background: ')
print (b_free.branches[2]['w'])

print ('we will now run 2 branch models over this tree, one letting the omega \nvalue of foreground species to be free, and the other fixing it at one.\n')

print ("The End.")



예제 #7
0
raw_input ('\n   alignment loaded, hit some key to see.\n')

tree.show()

print '''
we will run free-ratio model that is one of models available through
function run_model:
+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
'''
print tree.run_model.__doc__ +'\n+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++'

tree.run_model ('fb.example')

raw_input ('free-ratio model runned, all results are store in a Model object.')

fb = tree.get_evol_model('fb.example')

print 'Have a look to the parameters used to run this model on codeml: '
print fb.get_ctrl_string()
raw_input ('hit some key...')


print 'Have a look to run message of codeml: '
print fb.run
raw_input ('hit some key...')

print 'Have a look to log likelihood value of this model, and number of parameters:'
print 'lnL: %s and np: %s' % (fb.lnL, fb.np)
raw_input ('hit some key...')

raw_input ('finally have a look to two layouts available to display free-ratio:')
예제 #8
0
raw_input('''Now we can do comparisons...
Compare first if we have one or 2 rates of evolution among phylogeny.
LRT between b_free and M0 (that is one or two rates of omega value)
p-value ofthis comparison is:''')
print tree.get_most_likely('b_free.137', 'M0')

raw_input('''
Now test if foreground rate is significantly different of 1.
(b_free with significantly better likelihood than b_neut)
if significantly different, and higher than one, we will be under
positive selection, if different and lower than 1 we will be under
negative selection. And finally if models are not significantly different
we should accept null hypothesis that omega value on marked branches is
equal to 1, what would be a signal of relaxation.
p-value for difference in rates between marked branches and the rest:''')
print tree.get_most_likely('b_free.137', 'M0')
print 'p-value representing significance that omega is different of 1:'
print tree.get_most_likely('b_free.137', 'b_neut.137')

print 'value of omega in marked branch (frg branch):'
b_free = tree.get_evol_model('b_free.137')
print b_free.branches[1]['w']

print 'and value of omega for background: '
print b_free.branches[2]['w']

print 'we will now run 2 branch models over this tree, one letting the omega \nvalue of foreground species to be free, and the other fixing it at one.\n'

print "The End."
예제 #9
0
    print ('\n---------\nNow working with leaf ' + leaf.name)
    tree.mark_tree([leaf.node_id], marks=['#1'])
    print (tree.write())
    # to organize a bit, we name model with the name of the marked node
    # any character after the dot, in model name, is not taken into account
    # for computation. (have a look in /tmp/ete3.../bsA.. directory)
    print ('running model bsA and bsA1')
    tree.run_model('bsA.'+ leaf.name)
    tree.run_model('bsA1.' + leaf.name)
    print ('p-value of positive selection for sites on this branch is: ')
    ps = tree.get_most_likely('bsA.' + leaf.name, 'bsA1.'+ leaf.name)
    rx = tree.get_most_likely('bsA1.'+ leaf.name, 'M0')
    print (str(ps))
    print ('p-value of relaxation for sites on this branch is: ')
    print (str(rx))
    model = tree.get_evol_model("bsA." + leaf.name)
    if ps < 0.05 and float(model.classes['foreground w'][2]) > 1:
        print ('we have positive selection on sites on this branch')
        tree.show(histfaces=['bsA.' + leaf.name])
    elif rx<0.05 and ps>=0.05:
        print ('we have relaxation on sites on this branch')
    else:
        print ('no signal detected on this branch, best fit for M0')
    print ('\nclean tree, remove marks')
    tree.mark_tree(map(lambda x: x.node_id, tree.get_descendants()),
                    marks=[''] * len(tree.get_descendants()), verbose=True)

# nothing working yet to get which sites are under positive selection/relaxation,
# have to look at the main outfile or rst outfile

print ('The End.')
예제 #10
0
06 Feb 2011

use slr to compute evolutionary rates
"""

__author__  = "Francois-Jose Serra"
__email__   = "*****@*****.**"
__licence__ = "GPLv3"
__version__ = "0.0"

from ete3 import EvolTree


tree = EvolTree ("data/S_example/measuring_S_tree.nw")
tree.link_to_alignment ("data/S_example/alignment_S_measuring_evol.fasta")


tree.run_model ('SLR')

slr = tree.get_evol_model ('SLR')

slr.set_histface (up=False, kind='curve',errors=True,
                  hlines = [1.0,0.3], hlines_col=['black','grey'])

tree.show (histfaces=['SLR'])





예제 #11
0
        for initial_omega in [0.2, 0.7, 1.2]:
            if model == 'bsA1':
                initial_omega = 1.0
            model_specifications = model + '.' + branch_estimation + '_' + \
                                   str(initial_omega) + 'w'
            print 'Testing model ' + model + ' on ' + alignment_name + \
                  ' using starting branch length option ' + \
                  branch_estimation + ' and initial omega: ' + \
                  str(initial_omega) + 'w'
            if model == 'XX':
                tree.run_model(model_specifications, \
                            fix_blength=starting_branch_length_option, \
                            omega=initial_omega, NSsites=22, ncatG=3)

                # Here's the garbage I wrote to make sure that it parses the out files correctly
                tree.get_evol_model(
                    model_specifications).properties['typ'] = 'branch-site'
                tree.get_evol_model(model_specifications)._load(
                    model_specifications + '/out')

            else:
                tree.run_model(model_specifications, \
                            fix_blength=starting_branch_length_option, \
                            omega=initial_omega)
            current_model = tree.get_evol_model(model_specifications)
            print 'The fitting of model ' + model + ' on ' + alignment_name + \
                  ' using starting branch length option ' + \
                  branch_estimation + ' and initial omega: ' + \
                  str(initial_omega) + 'w, the likelihood was: ' + \
                  str(current_model.lnL)
            if current_model.lnL > best_lnL[model]:
                best_lnL[model] = current_model.lnL
예제 #12
0
    print('\n---------\nNow working with leaf ' + leaf.name)
    tree.mark_tree([leaf.node_id], marks=['#1'])
    print(tree.write())
    # to organize a bit, we name model with the name of the marked node
    # any character after the dot, in model name, is not taken into account
    # for computation. (have a look in /tmp/ete3.../bsA.. directory)
    print('running model bsA and bsA1')
    tree.run_model('bsA.' + leaf.name)
    tree.run_model('bsA1.' + leaf.name)
    print('p-value of positive selection for sites on this branch is: ')
    ps = tree.get_most_likely('bsA.' + leaf.name, 'bsA1.' + leaf.name)
    rx = tree.get_most_likely('bsA1.' + leaf.name, 'M0')
    print(str(ps))
    print('p-value of relaxation for sites on this branch is: ')
    print(str(rx))
    model = tree.get_evol_model("bsA." + leaf.name)
    if ps < 0.05 and float(model.classes['foreground w'][2]) > 1:
        print('we have positive selection on sites on this branch')
        tree.show(histfaces=['bsA.' + leaf.name])
    elif rx < 0.05 and ps >= 0.05:
        print('we have relaxation on sites on this branch')
    else:
        print('no signal detected on this branch, best fit for M0')
    print('\nclean tree, remove marks')
    tree.mark_tree(map(lambda x: x.node_id, tree.get_descendants()),
                   marks=[''] * len(tree.get_descendants()),
                   verbose=True)

# nothing working yet to get which sites are under positive selection/relaxation,
# have to look at the main outfile or rst outfile
예제 #13
0
for leaf in chimaeriformes:
    tree.mark_tree([leaf.node_id], marks=["#1"])
#tree.run_model("bsA." + chimaeriformes)
#tree.mark_tree([leaf.node_id], marks = ["#1"])
print("Running")
print(tree.write())
tree.run_model('bsA.Chimaeriformes')
tree.run_model("bsA1.Chimaeriformes")

print('p-value of positive selection for sites on this branch is: ')
ps = tree.get_most_likely('bsA.Chimaeriformes', 'bsA1.Chimaeriformes')
print(str(ps))
rx = tree.get_most_likely('bsA1.Chimaeriformes', 'M0')
print(str(rx))
model = tree.get_evol_model("bsA.Chimaeriformes")
if ps < 0.05 and float(model.classes['foreground w'][2]) > 1:
    print('we have positive selection on sites on this branch')
    tree.show(histfaces=['bsA1.Chimaeriformes'])
elif rx < 0.05 and ps >= 0.05:
    print('we have relaxation on sites on this branch')
else:
    print('no signal detected on this branch, best fit for M0')
#tree.show(histfaces=['bsA1.'])

for models in tree._models:
    print(tree.get_evol_model(models))

from _pickle import dump

#out = open('my_tree.pik', 'w')
예제 #14
0
파일: 1_freeratio.py 프로젝트: abdo3a/ete
print(
    """
we will run free-ratio model that is one of models available through
function run_model:
+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
"""
)
print(
    tree.run_model.__doc__ + "\n+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++"
)

tree.run_model("fb.example")

input("free-ratio model runned, all results are store in a Model object.")

fb = tree.get_evol_model("fb.example")

print("Have a look to the parameters used to run this model on codeml: ")
print(fb.get_ctrl_string())
input("hit some key...")


print("Have a look to run message of codeml: ")
print(fb.run)
input("hit some key...")

print("Have a look to log likelihood value of this model, and number of parameters:")
print("lnL: %s and np: %s" % (fb.lnL, fb.np))
input("hit some key...")

input("finally have a look to two layouts available to display free-ratio:")