def test_get_most_likely(self): tree = EvolTree (WRKDIR + 'tree.nw') tree.workdir = 'examples/evol/data/protamine/PRM1/paml/' tree.link_to_evol_model (WRKDIR + 'paml/M1/M1.out', 'M1') tree.link_to_evol_model (WRKDIR + 'paml/M2/M2.out', 'M2') self.assertEqual(round(tree.get_most_likely ('M2','M1'),16), round(6.3280740347111373e-10,16))
def test_paml_parser(self): alignments = [' 3 6\nseq1\tyo\nATGATG\nseq2\nCTGATG\nseq3\nATGTTT\n', ' 3 6\n>seq1\nATGATG\n>seq2\t|prout\nCTGATG\n>seq3\nATGTTT\n', '>seq1 \nATGATG\n>seq2\nCTGATG\n>seq3\nATGTTT\n'] for ali in alignments: t = EvolTree('((seq1,seq2),seq3);') t.link_to_alignment(ali) self.assertEqual((t & 'seq1').nt_sequence, 'ATGATG') self.assertEqual((t & 'seq2').nt_sequence, 'CTGATG') self.assertEqual((t & 'seq3').nt_sequence, 'ATGTTT')
def test_run_codeml(self): if which('codeml'): tree = EvolTree('((seq1,seq2),seq3);') tree.link_to_alignment('>seq1\nATGCTG\n>seq2\nATGCTG\n>seq3\nTTGATG\n') tree.run_model('fb') self.assert_('CODONML' in tree.get_evol_model('fb').run) self.assert_('Time used:' in tree.get_evol_model('fb').run) self.assert_('end of tree file' in tree.get_evol_model('fb').run) self.assert_('lnL' in tree.get_evol_model('fb').run) self.assert_(tree.get_descendants()[0].w > 0)
def test_marking_trees(self): TREE_PATH = ETEPATH + '/examples/evol/data/S_example/' tree = EvolTree (TREE_PATH + 'measuring_S_tree.nw') self.assertEqual(tree.write(), '((Hylobates_lar,(Gorilla_gorilla,Pan_troglodytes)),Papio_cynocephalus);') tree.mark_tree ([1, 3, 7] + [2, 6], marks=['#1']*3 + ['#2']*2, verbose=True) self.assertEqual(tree.write().replace(' ', ''), '((Hylobates_lar#2,(Gorilla_gorilla#1,Pan_troglodytes#1)#1)#2,Papio_cynocephalus);') tree.mark_tree (map (lambda x: x.node_id, tree.get_descendants()), marks=[''] * len (tree.get_descendants()), verbose=False) self.assertEqual(tree.write().replace(' ', ''), '((Hylobates_lar,(Gorilla_gorilla,Pan_troglodytes)),Papio_cynocephalus);')
def test_run_slr(self): if which('Slr'): tree = EvolTree('((seq1,seq2),seq3);') tree.link_to_alignment('>seq1\nCTGATTCTT\n>seq2\nCTGATTCTT\n>seq3\nATGATTCTT\n') tree.run_model('SLR') self.assert_('Sitewise Likelihood R' in tree.get_evol_model('SLR').run) self.assert_('Positively selected s' in tree.get_evol_model('SLR').run) self.assert_('Conserved sites' in tree.get_evol_model('SLR').run) self.assert_('lnL' in tree.get_evol_model('SLR').run)
def test_call_histface(self): tree = EvolTree (WRKDIR + 'tree.nw') tree.workdir = 'examples/evol/data/protamine/PRM1/paml/' tree.link_to_alignment (WRKDIR + 'alignments.fasta_ali') tree.link_to_evol_model (WRKDIR + 'paml/M2/M2.out', 'M2.a') col = {'NS' : 'grey', 'RX' : 'black', 'RX+': 'grey', 'CN' : 'black', 'CN+': 'grey', 'PS' : 'black', 'PS+': 'black'} col2 = {'NS' : 'white', 'RX' : 'white', 'RX+': 'white', 'CN' : 'white', 'CN+': 'white', 'PS' : 'white', 'PS+': 'white'} M2a = tree.get_evol_model('M2.a') try: import PyQt4 except ImportError: pass else: M2a.set_histface (up=False, kind='stick', hlines=[1.0, 0.3], hlines_col=['red','grey'], header='ugliest face') M2a.set_histface (up=False, kind='curve', colors=col2,errors=True, hlines = [2.5, 1.0, 4.0, 0.5], header = 'Many lines, error boxes, background black', hlines_col=['orange', 'yellow', 'red', 'cyan']) M2a.set_histface (up=False, kind='bar', hlines = [1.0, 0.3], hlines_col=['black','grey'],colors=col) self.assertEqual(str(type(M2a.properties['histface'])), "<class 'ete2.treeview.faces.SequencePlotFace'>")
def test_deep_copy(self): tree = EvolTree (WRKDIR + 'tree.nw') tree.workdir = 'examples/evol/data/protamine/PRM1/paml/' tree.link_to_evol_model (WRKDIR + 'paml/fb/fb.out', 'fb') fba = deepcopy (tree.get_evol_model('fb')) tree._models['fb.a'] = fba self.assertEqual(str(tree.get_evol_model('fb.a')), str(tree.get_evol_model('fb')) )
def test_pickling(self): tree = EvolTree (WRKDIR + 'tree.nw') tree.workdir = ETEPATH + '/examples/data/protamine/PRM1/paml/' tree.link_to_alignment (WRKDIR + 'alignments.fasta_ali') tree.link_to_evol_model (WRKDIR + 'paml/M2/M2.out', 'M2.a') out = open('blip.pik', 'w') dump (tree, out) out.close() out = open('blip.pik') tree2 = load (out) out.close() os.remove('blip.pik') self.assertEqual(str(tree2.get_evol_model('M2.a')), str(tree.get_evol_model('M2.a')) )
def test_labelling_tree(self): tree = EvolTree (WRKDIR + 'tree.nw') tree.workdir = 'examples/evol/data/protamine/PRM1/paml/' random_swap(tree) tree.link_to_evol_model (WRKDIR + 'paml/fb/fb.out', 'fb') self.assert_(check_annotation (tree))
def test_load_model(self): tree = EvolTree (WRKDIR + 'tree.nw') tree.workdir = 'examples/evol/data/protamine/PRM1/paml/' tree.link_to_evol_model (WRKDIR + 'paml/fb/fb.out', 'fb') tree.link_to_evol_model (WRKDIR + 'paml/M1/M1.out', 'M1') tree.link_to_evol_model (WRKDIR + 'paml/M2/M2.out', 'M2') tree.link_to_evol_model (WRKDIR + 'paml/M7/M7.out', 'M7') tree.link_to_evol_model (WRKDIR + 'paml/M8/M8.out', 'M8') tree.link_to_alignment (WRKDIR + 'alignments.fasta_ali') self.assertEqual(sorted(tree._models.keys()), sorted(['fb', 'M1', 'M2', 'M7', 'M8'])) self.assertEqual(len (tree.get_evol_model('M2').branches), 194) self.assertEqual(tree.get_evol_model('fb').lnL, -3265.316569) self.assert_('proportions' in str(tree.get_evol_model('M2'))) self.assert_('p2=' in str(tree.get_evol_model('M2'))) self.assert_('proportions' not in str(tree.get_evol_model('fb'))) self.assert_(' #193' in str(tree.get_evol_model('fb')))
def parse_paml(pamout, model): ''' parser function for codeml files, with values of w,dN,dS etc... dependending of the model tested. ''' # if multiple dataset in same file we divide the outfile and model.name+x if not '*' in str(model.properties['params']['ndata']): divide_data(pamout, model) return all_lines = open(pamout).readlines() # if we do not have tree, load it if model._tree == None: from ete2.evol import EvolTree model._tree = EvolTree(re.findall('\(.*\);', ''.join(all_lines))[2]) model._tree._label_as_paml() # starts parsing for i, line in enumerate(all_lines): if line is '\n': continue # codon frequency if line.startswith('Codon frequencies under model'): model.stats['codonFreq'] = [] for j in xrange(16): line = map(float, re.findall('\d\.\d+', all_lines[i + j + 1])) model.stats['codonFreq'] += [line] continue if not model.stats.has_key('codonFreq'): continue ###################### # start serious staff line = line.rstrip() # lnL and number of parameters if line.startswith('lnL'): try: line = re.sub('.* np: *(\d+)\): +(-\d+\.\d+).*', '\\1 \\2', line) model.stats['np'] = int(line.split()[0]) model.stats['lnL'] = float(line.split()[1]) except ValueError: line = re.sub('.* np: *(\d+)\): +(nan).*', '\\1 \\2', line) model.stats['np'] = int(line.split()[0]) model.stats['lnL'] = float('-inf') continue # get labels of internal branches if line.count('..') >= 2: labels = re.findall('\d+\.\.\d+', line + ' ') _check_paml_labels(model._tree, labels, pamout, model) continue # retrieve kappa if line.startswith('kappa '): try: model.stats['kappa'] = float( re.sub('.*(\d+\.\d+).*', '\\1', line)) except ValueError: model.stats['kappa'] = 'nan' # retrieve dS dN t w N S and if present, errors. from summary table if line.count('..') == 1 and line.startswith(' '): if not re.match(' +\d+\.\.\d+ +\d+\.\d+ ', line): if re.match(' +( +\d+\.\d+){8}', all_lines[i + 1]): _get_values(model, line.split()[0] + ' ' + all_lines[i + 1]) continue _get_values(model, line) continue
#MY_PATH = '/home/francisco/toolbox/ete2-codeml/doc/tutorial/examples/' MY_PATH = '' TREE_PATH = MY_PATH + re.sub('\./', '', TREE_PATH) ALG_PATH = MY_PATH + re.sub('\./', '', ALG_PATH ) ### # load tree print '\n ----> we create a EvolTree object, and give to him a topology, from', print TREE_PATH out = True while out == True: try: T = EvolTree(TREE_PATH) out = False except: sys.stderr.write('Bad path for working directory. Enter new path or quit("Q"):\n') PATH = raw_input('') if PATH.startswith('q') or PATH.startswith('Q'): sys.exit() TREE_PATH = "./measuring_%s_tree.nw" % (typ) ALG_PATH = "./alignment_%s_measuring_evol.fasta" % (typ) TREE_PATH = PATH + re.sub('\./', '', TREE_PATH) ALG_PATH = PATH + re.sub('\./', '', ALG_PATH ) print T print '\n ----> and an alignment from: \n'+ALG_PATH+'\n\n' T.link_to_alignment(ALG_PATH)
from ete2.evol import EvolTree import sys, re typ = 'S' #while typ != 'L' and typ != 'S': # typ = raw_input (\ # "choose kind of example [L]ong or [S]hort, hit [L] or [S]:\n") TREE_PATH = "data/S_example/measuring_%s_tree.nw" % (typ) ALG_PATH = "data/S_example/alignment_%s_measuring_evol.fasta" % (typ) WORKING_PATH = "data/S_example/paml/" MY_PATH = '' TREE_PATH = MY_PATH + re.sub('\./', '', TREE_PATH) ALG_PATH = MY_PATH + re.sub('\./', '', ALG_PATH ) T = EvolTree (TREE_PATH) T.link_to_alignment (ALG_PATH) T.workdir = (WORKING_PATH) T.link_to_evol_model(T.workdir + '/fb/out','fb') T.link_to_evol_model(T.workdir + '/M1/out','M1') T.link_to_evol_model(T.workdir + '/M2/out','M2') T.show(histfaces=['M2']) sys.stderr.write('\n\nThe End.\n\n')