Ejemplo n.º 1
0
def parse_paml(pamout, model):
    '''
    parser function for codeml files,
    with values of w,dN,dS etc... dependending of the model
    tested.
    '''
    # if multiple dataset in same file we divide the outfile and model.name+x
    if not '*' in str(model.properties['params']['ndata']):
        divide_data(pamout, model)
        return
    all_lines = open(pamout).readlines()
    # if we do not have tree, load it
    if model._tree == None:
        from ete3.evol import EvolTree
        model._tree = EvolTree(re.findall('\(.*\);', ''.join(all_lines))[2])
        model._tree._label_as_paml()
    # starts parsing
    for i, line in enumerate(all_lines):
        if line is '\n':
            continue
        # codon frequency
        if line.startswith('Codon frequencies under model'):
            model.stats['codonFreq'] = []
            for j in range(16):
                line = list(
                    map(float, re.findall('\d\.\d+', all_lines[i + j + 1])))
                model.stats['codonFreq'] += [line]
            continue
        if 'codonFreq' not in model.stats:
            continue
        ######################
        # start serious staff
        line = line.rstrip()
        # lnL and number of parameters
        if line.startswith('lnL'):
            try:
                line = re.sub('.* np: *(\d+)\): +(-\d+\.\d+).*', '\\1 \\2',
                              line)
                model.stats['np'] = int(line.split()[0])
                model.stats['lnL'] = float(line.split()[1])
            except ValueError:
                line = re.sub('.* np: *(\d+)\): +(nan).*', '\\1 \\2', line)
                model.stats['np'] = int(line.split()[0])
                model.stats['lnL'] = float('-inf')
            continue
        # get labels of internal branches
        if line.count('..') >= 2:
            labels = re.findall('\d+\.\.\d+', line + ' ')
            _check_paml_labels(model._tree, labels, pamout, model)
            continue
        # retrieve kappa
        if line.startswith('kappa '):
            try:
                model.stats['kappa'] = float(
                    re.sub('.*(\d+\.\d+).*', '\\1', line))
            except ValueError:
                model.stats['kappa'] = 'nan'
        # retrieve dS dN t w N S and if present, errors. from summary table
        if line.count('..') == 1 and line.startswith(' '):
            if not re.match(' +\d+\.\.\d+ +\d+\.\d+ ', line):
                if re.match(' +( +\d+\.\d+){8}', all_lines[i + 1]):
                    _get_values(model,
                                line.split()[0] + '  ' + all_lines[i + 1])
                continue
            _get_values(model, line)
            continue
#MY_PATH = '/home/francisco/toolbox/ete3-codeml/doc/tutorial/examples/'
MY_PATH = ''

TREE_PATH = MY_PATH + re.sub('\./', '', TREE_PATH)
ALG_PATH  = MY_PATH + re.sub('\./', '', ALG_PATH )

###
# load tree


print '\n         ----> we create a EvolTree object, and give to him a topology, from',
print TREE_PATH
out = True
while out == True:
    try:
        T = EvolTree(TREE_PATH)
        out = False
    except:
        sys.stderr.write('Bad path for working directory. Enter new path or quit("Q"):\n')
        PATH = raw_input('')
        if PATH.startswith('q') or PATH.startswith('Q'):
            sys.exit()
        TREE_PATH    = "./measuring_%s_tree.nw" % (typ)
        ALG_PATH     = "./alignment_%s_measuring_evol.fasta" % (typ)
        TREE_PATH = PATH + re.sub('\./', '', TREE_PATH)
        ALG_PATH  = PATH + re.sub('\./', '', ALG_PATH )


print T
print '\n         ----> and an alignment from: \n'+ALG_PATH+'\n\n'
T.link_to_alignment(ALG_PATH)
Ejemplo n.º 3
0
#MY_PATH = '/home/francisco/toolbox/ete3-codeml/doc/tutorial/examples/'
MY_PATH = ''

TREE_PATH = MY_PATH + re.sub('\./', '', TREE_PATH)
ALG_PATH = MY_PATH + re.sub('\./', '', ALG_PATH)

###
# load tree

print '\n         ----> we create a EvolTree object, and give to him a topology, from',
print TREE_PATH
out = True
while out == True:
    try:
        T = EvolTree(TREE_PATH)
        out = False
    except:
        sys.stderr.write(
            'Bad path for working directory. Enter new path or quit("Q"):\n')
        PATH = raw_input('')
        if PATH.startswith('q') or PATH.startswith('Q'):
            sys.exit()
        TREE_PATH = "./measuring_%s_tree.nw" % (typ)
        ALG_PATH = "./alignment_%s_measuring_evol.fasta" % (typ)
        TREE_PATH = PATH + re.sub('\./', '', TREE_PATH)
        ALG_PATH = PATH + re.sub('\./', '', ALG_PATH)

print T
print '\n         ----> and an alignment from: \n' + ALG_PATH + '\n\n'
T.link_to_alignment(ALG_PATH)
Ejemplo n.º 4
0
from ete3.evol import EvolTree
import sys, re

typ = 'S'
#while typ != 'L' and typ != 'S':
#    typ = raw_input (\
#        "choose kind of example [L]ong or [S]hort, hit [L] or [S]:\n")

TREE_PATH    = "data/S_example/measuring_%s_tree.nw" % (typ)

ALG_PATH     = "data/S_example/alignment_%s_measuring_evol.fasta" % (typ)
WORKING_PATH = "data/S_example/paml/"

MY_PATH = ''

TREE_PATH = MY_PATH + re.sub('\./', '', TREE_PATH)
ALG_PATH  = MY_PATH + re.sub('\./', '', ALG_PATH )

T = EvolTree (TREE_PATH)
T.link_to_alignment (ALG_PATH)
T.workdir = (WORKING_PATH)
T.link_to_evol_model(T.workdir + '/fb/out','fb')
T.link_to_evol_model(T.workdir + '/M1/out','M1')
T.link_to_evol_model(T.workdir + '/M2/out','M2')

T.show(histfaces=['M2'])
sys.stderr.write('\n\nThe End.\n\n')