def get_btree(treestr): if treestr.count(':') == 1: # one-leaf tree name, lengthstr = treestr.strip().rstrip(';').split(':') tree = OneLeafTree(name, float(lengthstr)) else: tree = baltic.tree() baltic.make_tree(treestr, tree, verbose=False) tree.traverse_tree() return tree
), 'Expected number of tips: %s\nNumber of tips found: %s' % ( tipNum, len(tips) ) ## check that correct numbers of tips have been parsed ################################################################################### start analysing trees cerberus = re.match( 'tree\sSTATE\_([0-9]+).+\[\&R\]\s', line ) ## search for crud at the beginning of the line that's not a tree string if cerberus is not None: ## tree identified ################################################################# at state 0 - create the header for the output file and read the tree (in case the output log file requires information encoded in the tree) if treecount == 0: ## At tree state 0 insert header into output file ll = bt.tree() ## empty tree object start = len(cerberus.group() ) ## index of where tree string starts in the line treestring = str(line[start:]) ## grab tree string bt.make_tree(treestring, ll) ## read tree string if lower == 0 and upper == np.inf: ## only add a header if not doing a chunk outfile.write('state') ## begin the output log file ########################################### add header to output log file if 'treeLength' in analyses: outfile.write('\ttreeLength') ########################################### if 'RC' in analyses: outfile.write('\tN\tS\tuN\tuS\tdNdS') ########################################### if 'tmrcas' in analyses: tmrcas = { 'A': [], 'B': [], 'C': [] } ## dict of clade names
cerberus = re.search( 'dimensions ntax=([0-9]+);', l.lower()) ## check how many tips there are supposed to be if cerberus is not None: tipNum = int(cerberus.group(1)) ##################### cerberus = re.search( 'tree TREE([0-9]+) = \[&R\]', l) ## search for beginning of tree string in BEAST format if cerberus is not None: treeString_start = l.index( '(') ## tree string starts where the first '(' is in the line ll = bt.tree() ## new instance of tree bt.make_tree( l[treeString_start:], ll ) ## send tree string to make_tree function, provide an empty tree object ##################### if tipFlag == True: cerberus = re.search( '([0-9]+) ([A-Za-z\-\_\/\.\'0-9 \|?]+)', l ) ## look for tip name map, where each tip is given an integer to represent it in tree if cerberus is not None: tips[cerberus.group(1)] = cerberus.group(2).strip( "'" ) ## if you give tips an integer (in the form of a string), it will return the full name of the tip elif ';' not in l: ## something's wrong - nothing that matches the tip regex is being captured where it should be in the file print 'tip not captured by regex:', l.replace('\t', '') if 'translate' in l.lower(): ## start looking for tips
cerberus=re.search('([0-9]+) ([\'\"A-Za-z0-9\?\|\-\_\.\/]+)',line) tips[cerberus.group(1)]=cerberus.group(2).strip("'") if 'tree STATE_' in line and plate==True: ## starting actual analysis plate=False assert (tipNum == len(tips)),'Expected number of tips: %s\nNumber of tips found: %s'%(tipNum,len(tips)) ## check that correct numbers of tips have been parsed ################################################################################### start analysing trees cerberus=re.match('tree\sSTATE\_([0-9]+).+\[\&R\]\s',line) ## search for crud at the beginning of the line that's not a tree string if cerberus is not None: ## tree identified ################################################################# at state 0 - create the header for the output file and read the tree (in case the output log file requires information encoded in the tree) if treecount==0: ## At tree state 0 insert header into output file ll=bt.tree() ## empty tree object start=len(cerberus.group()) ## index of where tree string starts in the line treestring=str(line[start:]) ## grab tree string bt.make_tree(treestring,ll) ## read tree string if lower==0 and upper==np.inf: ## only add a header if not doing a chunk outfile.write('state') ## begin the output log file ########################################### add header to output log file if 'treeLength' in analyses: outfile.write('\ttreeLength') ########################################### if 'RC' in analyses: outfile.write('\tN\tS\tuN\tuS\tdNdS') ########################################### if 'tmrcas' in analyses: tmrcas={'A':[],'B':[],'C':[]} ## dict of clade names ll.renameTips(tips) for k in ll.Objects: ## iterate over branches if isinstance(k,bt.leaf): ## only interested in tips if 'A' in k.name: ## if name of tip satisfies condition
try: treefiles = sorted( [t for t in glob('*bestTree*') if t.split('_')[-1] in protein_list], key=lambda t: protein_list.index(t.split('_')[-1])) assert len(treefiles) == len(protein_list) except ValueError as e: # For now, require that we can match all tree files to a protein print 'Oops! Plotting without building trees? Make sure your trees are named like `whatever_protein` and you passed a list of matching `protein` names to `-p` to set the order of trees.\n\n' raise e except AssertionError as e: print 'ERROR: Missing tree files. Looked for trees for these proteins:\n', protein_list, '\n\n' raise e trees = {} for i, t in enumerate(treefiles): treestring, treeobject = open(t, 'r').readline().strip(), bt.tree() bt.make_tree(treestring, treeobject) treeobject.treeStats() ## initial traversal, checks for stats treeobject.sortBranches( ) ## traverses tree, sorts branches, draws tree (sets plotting coordinates) trees[i] = treeobject for i in range(1, len(treefiles)): print 'Untangling tree number %d' % i untangle(trees[i - 1], trees[i]) ################ ## Plot Genome Map ################ if proteins == None and reference != None: # If we didn't parse proteins earlier, but have the reference sequence, do so now. proteins, reference_seq = load_reference(reference) reference_seq = str(reference_seq.seq)