def _setupOutput(self): ### Sets up output files self.str['MapFas','MissFas','MapRes'] '''Sets up output files self.str['MapFas','MissFas','MapRes'].''' ### ~ [0] Setup ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ### delimit = rje.getDelimit(self.cmd_list) if self.str['StartFrom'].lower() in ['','none']: self.str['StartFrom'] = '' else: self.bool['Append'] = True self.printLog('#CMD','StartFrom = "%s" so Append=T' % self.str['StartFrom']) ### ~ [1] General ResFile ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ### files = {'MapFas':'mapping.fas','MissFas':'missing.fas','MapRes':'mapping.%s' % rje.delimitExt(delimit)} if self.getBool('Combine'): files.pop('MissFas') if self.str['ResFile'].lower() in ['','none']: self.str['ResFile'] = '%s.%s' % (rje.baseFile(self.str['SeqIn']),rje.baseFile(self.str['MapDB'],strip_path=True)) for file in files.keys(): self.setStr({file: self.getStr('ResFile') + '.' + files[file]}) rje.backup(self,self.getStr(file)) ### ~ [2] Headers for MapRes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ### #!# Consider replacing with rje_db object? #!# self.list['Headers'] = ['Query','Hit','Method','MapRank','BlastRank','EVal','Score'] for qh in ['Query','Hit']: self.list['Headers'] += ['%s_Species' % qh] if self.bool['GablamOut']: for st in ['Len','Sim','ID']: self.list['Headers'] += ['%s_%s' % (qh,st)] rje.delimitedFileOutput(self,self.str['MapRes'],self.list['Headers'],delimit)
def convert(self,filelist=[],outfile=None): ### Converts scansite output files in FileList to Outfile ''' Converts scansite output files in FileList to Outfile. ''' try: ### Setup ### _stage = 'Setup' if len(filelist) < 1: filelist = self.list['FileList'] if not outfile: outfile = self.info['Name'] if len(filelist) < 1: self.log.errorLog('No scansite files to convert! %s unchanged/not made.' % outfile,printerror=False) return False delimit = rje.getDelimit(self.cmd_list) ext = rje.delimitExt(delimit) if ext != outfile[-3:]: newfile = outfile[:-3] + ext if rje.yesNo('Change file name from %s to %s?' % (outfile, newfile)): outfile = newfile self.log.printLog('#OUT','Converting %d file(s), output to %s.' % (len(filelist),outfile)) ### Output File ### _stage = 'Output File' if not self.opt['Append'] or not os.path.exists(outfile): # Create with header OUTFILE = open(outfile,'w') headers = ['seq_id','enzyme','enz_group','aa','pos','score','percentile','matchseq','sa'] rje.writeDelimit(OUTFILE,headers,delimit) else: OUTFILE = open(outfile,'a') ### Conversion ### _stage = 'Conversion' sx = 0 for infile in filelist: if not os.path.exists(infile): self.log.errorLog('Input file %s does not exist! :o(' % infile,False,False) continue fx = 0 INFILE = open(infile,'r') inline = rje.nextLine(INFILE) while inline != None: if rje.matchExp(re_scansite,inline): scanlist = rje.matchExp(re_scansite,inline) rje.writeDelimit(OUTFILE,scanlist,delimit) sx += 1 fx += 1 rje.progressPrint(self,sx) inline = rje.nextLine(INFILE) self.log.printLog('#OUT','%s scansite results from %s. (%s Total.)' % (rje.integerString(fx),infile,rje.integerString(sx))) INFILE.close() ### End ### _stage = 'End' OUTFILE.close() self.log.printLog('#OUT','%s scansite results output to %s.' % (rje.integerString(sx),outfile)) return True except: self.log.errorLog('Error in convert(%s)' % _stage,printerror=True,quitchoice=False) raise
def ppiDisMatrix(self): ### Converts PPI Table into distance matrix '''Converts PPI Table into distance matrix.''' try: ### Check File ### if not os.path.exists(self.info['PPITab']): self.log.errorLog('PPI Table file "%s" missing!' % self.info['PPITab'], printerror=False) return False ### Setup ### data = rje.dataDict(self, self.info['PPITab'], getheaders=True) headers = data.pop('Headers') ppidis = rje_dismatrix.DisMatrix(self.log, self.cmd_list) ppidis.opt['Symmetric'] = True ppidis.setInfo({ 'Name': '%s.ppi_dis.txt' % self.info['Basefile'], 'Type': 'PPI' }) ### Make DisMatrix ### for p1 in headers[1:]: ppidis.addDis(p1, p1, 0) for p2 in headers[headers.index(p1) + 1:]: ppi = 0 unique = 0 for i in data.keys(): try: v1 = int(data[i][p1]) except: v1 = data[i][p1] try: v2 = int(data[i][p2]) except: v2 = data[i][p2] if v1 or v2: ppi += 1 if not (v1 and v2): unique += 1 if self.opt['Scaled']: ppidis.addDis(p1, p2, float(unique) / float(ppi)) else: ppidis.addDis(p1, p2, unique) ### Output ### delimit = rje.getDelimit(self.cmd_list, default=',') ppidis.saveMatrix(headers[1:], ppidis.info['Name'], delimit) except: self.log.errorLog('Major problem with rje_ppi.ppiDisMatrix') return False
def ppiDisMatrix(self): ### Converts PPI Table into distance matrix '''Converts PPI Table into distance matrix.''' try: ### Check File ### if not os.path.exists(self.info['PPITab']): self.log.errorLog('PPI Table file "%s" missing!' % self.info['PPITab'],printerror=False) return False ### Setup ### data = rje.dataDict(self,self.info['PPITab'],getheaders=True) headers = data.pop('Headers') ppidis = rje_dismatrix.DisMatrix(self.log,self.cmd_list) ppidis.opt['Symmetric'] = True ppidis.setInfo({'Name':'%s.ppi_dis.txt' % self.info['Basefile'],'Type':'PPI'}) ### Make DisMatrix ### for p1 in headers[1:]: ppidis.addDis(p1,p1,0) for p2 in headers[headers.index(p1)+1:]: ppi = 0 unique = 0 for i in data.keys(): try: v1 = int(data[i][p1]) except: v1 = data[i][p1] try: v2 = int(data[i][p2]) except: v2 = data[i][p2] if v1 or v2: ppi += 1 if not (v1 and v2): unique += 1 if self.opt['Scaled']: ppidis.addDis(p1,p2,float(unique)/float(ppi)) else: ppidis.addDis(p1,p2,unique) ### Output ### delimit = rje.getDelimit(self.cmd_list,default=',') ppidis.saveMatrix(headers[1:],ppidis.info['Name'],delimit) except: self.log.errorLog('Major problem with rje_ppi.ppiDisMatrix') return False
def scap(self): ### Full SCAP method '''Full SCAP method.''' try:### ~ [1] Setup ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ### markov = self.obj['Markov'] minx = markov.stat['MinXmer'] maxx = markov.stat['MaxXmer'] headers = ['seq','type','sorted'] for x in range(minx,maxx+1): headers.append('X%d' % x) delimit = rje.getDelimit(self.cmd_list,'\t') scapfile = '%s.%s' % (self.info['Basefile'],rje.delimitExt(delimit)) rje.delimitedFileOutput(self,scapfile,headers,delimit,rje_backup=True) ### ~ [2] SCAP ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ### ## ~ [2a] Query ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ## (sx,stot) = (0.0,self.obj['SeqList'].seqNum()) for seq in self.obj['SeqList'].seq: self.progLog('\r#SCAP','SCAP processing Query to %s: %.2f%%' % (scapfile,(sx/stot))); sx += 100.0 datadict = {'seq':seq.shortName(),'type':'qry','sorted':markov.opt['Sorted']} for x in range(minx,maxx+1): datadict['X%d' % x] = self.scapSeq(seq.info['Sequence'],x) if datadict['X%d' % x] > 0.001: datadict['X%d' % x] = '%.4f' % datadict['X%d' % x] else: datadict['X%d' % x] = '%.3e' % datadict['X%d' % x] rje.delimitedFileOutput(self,scapfile,headers,delimit,datadict) self.printLog('\r#SCAP','SCAP processed Query to %s for %s sequences.' % (scapfile,rje.integerString(stot))) ## ~ [2b] Background ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ## if self.obj['ScapBack'] != self.obj['SeqList']: (sx,stot) = (0.0,self.obj['ScapBack'].seqNum()) for seq in self.obj['ScapBack'].seq: self.progLog('\r#SCAP','SCAP processing Background to %s: %.2f%%' % (scapfile,(sx/stot))); sx += 100.0 datadict = {'seq':seq.shortName(),'type':'bg','sorted':markov.opt['Sorted']} for x in range(minx,maxx+1): datadict['X%d' % x] = self.scapSeq(seq.info['Sequence'],x) if datadict['X%d' % x] > 0.001: datadict['X%d' % x] = '%.4f' % datadict['X%d' % x] else: datadict['X%d' % x] = '%.3e' % datadict['X%d' % x] rje.delimitedFileOutput(self,scapfile,headers,delimit,datadict) self.printLog('\r#SCAP','SCAP processed Background to %s for %s sequences.' % (scapfile,rje.integerString(stot))) if markov.opt['Sorted']: self.printLog('#SCAP','Sorted SCAP run complete') else: self.printLog('#SCAP','UnSorted SCAP run complete') except: self.errorLog(rje_zen.Zen().wisdom())
def hmmTable(self,outfile='',append=False,delimit=None): ### Outputs results table ''' Outputs results table. >> outfile:str = Name of output file >> append:boolean = whether to append file >> delimit:str = Delimiter to use [\t] ''' try: ### Setup ### if not outfile: outfile = self.info['HMMTab'] if outfile.lower() == 'none': self.log.printLog('#TAB','HMMTab = "None": No table output') return False if not delimit: delimit = rje.getDelimit(self.cmd_list,'\t') if not outfile: outfile = '%s.hmmer.%s' % (rje.baseFile(self.info['SearchDB'],True),rje.delimitExt(delimit)) self.readResults() self.log.printLog('#TAB','Tabulating results for %s searches into %s' % (len(self.search),outfile),log=False) ### Setup Resfile ### if self.opt['MySQL']: headers = ['HMM','Hit','Hit_Start','Hit_End','Eval','Score'] else: headers = ['Type','Name','Start','End','Eval','Score'] if not append or not os.path.exists(outfile): rje.delimitedFileOutput(self,outfile,headers,delimit,rje_backup=True) ### Output Search details ### for search in self.search: for hit in search.hit: for aln in hit.aln: out = {'HMM':search.info['Name'],'Type':search.info['Name'], 'Name':hit.info['Name'],'Hit':hit.info['Name'], 'Start':'%d' % aln.stat['SbjStart'], 'End':'%d' % aln.stat['SbjEnd'], 'Hit_Start':'%d' % aln.stat['SbjStart'], 'Hit_End':'%d' % aln.stat['SbjEnd'], 'Eval':'%.2e' % aln.stat['Expect'],'Score':'%.1f' % aln.stat['BitScore']} rje.delimitedFileOutput(self,outfile,headers,delimit,out) self.log.printLog('#OUT','Results for %s searches output to %s.' % (len(self.search),outfile)) except: self.log.errorLog('Fatal Error during hmmTable(%s).' % outfile) raise
def run(self,imenu=False,outputmap=True,returndict=False): ### Main controlling run Method ''' Main controlling run Method. >> imenu:boolean = Whether to initiate interactive menu if appropriate [False]. >> outputmap:boolean = Whether to output mapping into a file [True] >> returndict:boolean = Whether to return a dictionary of {searchname:mappedname} (no previous mapping) [False] ''' try:### ~ [0] Setup ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ### if not self.setup(imenu): raise ValueError seqlist = rje_seqlist.SeqList(self.log,self.cmd_list+['autoload=T','seqmode=file']) if not seqlist.seqNum(): self.warnLog('No sequences loaded for mapping.'); return {} ## ~ [0a] Setup BLAST Search ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ## blast = rje_blast.BLASTRun(self.log,['blaste=1e-4','blastv=20','blastf=F']+self.cmd_list+['v=-1']) blast.setStr({'DBase':self.getStr('MapDB'),'Type':'blastp','InFile':self.getStr('SeqIn'), 'Name':'%s-%s.blast' % (rje.baseFile(self.str['SeqIn'],True),rje.baseFile(self.str['MapDB'],True))}) blast.setStat({'HitAln':blast.getStat('OneLine')}) blast.list['ResTab'] = ['Search','Hit','GABLAM'] if seqlist.nt(): blast.str['Type'] = 'blastx' ## ~ [0b] Setup Output ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ## if outputmap: self._setupOutput() ## Output Files ## if returndict: mapdict = {} else: self._setupMapped() ## Previously Mapped Sequences ## seqx = seqlist.seqNum() ## Number of sequences ## ### ~ [1] BLAST Search Mapping ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ### self.printLog('#BLAST','BLASTing %s vs %s.\n *** This could take some time if files are large. Please be patient! ***' % (self.str['SeqIn'],self.str['MapDB']),log=False) ## ~ [1a] Perform BLAST Unless it exists ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ## blast.run(format=True) self.obj['DB'] = blast.obj['DB'] ## ~ [1b] Mapping from searches ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ## self.debug(self.getStr('MapDB')) self.obj['MapDB'] = rje_seqlist.SeqList(self.log,self.cmd_list+['autoload=F','seqmode=file','seqin=%s' % self.str['MapDB']]) self.obj['MapDB'].loadSeq(self.getStr('MapDB')) self.debug('%s' % self.obj['MapDB'].list['Seq']) sx = 0 while seqlist.nextSeq() != None: search = seqlist.getSeq(format='short') sx += 1 ## Check StartFrom ## if self.str['StartFrom']: if self.str['StartFrom'] != search: self.progLog('\r#SKIP','Looking for %s: skipping %d seqs' % (self.str['StartFrom'],sx)) continue self.str['StartFrom'] = '' self.printLog('\r#SKIP','Starting from %s: skipped %d seqs' % (self.str['StartFrom'],sx)) ## Check if in Mapped ## if search in self.list['Mapped']: resdict = {'Query':search,'Hit':search,'Method':'Already Mapped!'} self.printLog('#FAS','%s already in output - not duplicating in %s' % (search,self.str['MapFas'])) rje.delimitedFileOutput(self,self.str['MapRes'],self.list['Headers'],rje.getDelimit(self.cmd_list),resdict) continue ### Map Sequence ### self.printLog('#MAP','Mapping %s seqs: %s of %s' % (self.str['SeqIn'],rje.integerString(sx),rje.integerString(seqx))) mapname = self.mapSeq(seqlist,blast,search) if returndict: mapdict[search] = mapname ### ~ [2] Finish ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ### self.printLog('#MAP','Mapping of %s (%s seqs) complete.' % (self.str['SeqIn'],rje.integerString(seqx))) if os.path.exists(blast.str['Name']) and not (self.getBool('DeBug') or self.test()): os.unlink(blast.str['Name']) #!# Add option to keep BLAST! #!# if returndict: return mapdict except: self.errorLog('Error in SeqMapper.run()',printerror=True,quitchoice=True); raise
def mapPhosByBLAST( self, fasfile ): ### BLAST sequences against phosphoDB, align hits & mark sites (ID & Homology) '''BLAST sequences against phosphoDB, align hits and mark phosphosites (ID & Homology).''' try: ### ~ [1] Setup ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ### ## ~ [1a] Setup fasfile ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ## scmd = self.cmd_list + [ 'seqin=%s' % fasfile, 'autoload=T', 'autofilter=F' ] qseqlist = rje_seq.SeqList(self.log, scmd) qdict = qseqlist.seqNameDic() ## ~ [1b] Setup results files/directories ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ## basefile = rje.baseFile(fasfile) if self.info['PhosRes'].lower() in ['', 'none']: self.info['PhosRes'] = '%s.phosres.tdt' % basefile headers = ['Name', 'Pos', 'AA', 'PELM', 'PELMPos', 'Evidence'] delimit = rje.getDelimit( self.cmd_list, rje.delimitFromExt(filename=self.info['PhosRes'])) rje.delimitedFileOutput(self, self.info['PhosRes'], headers, delimit, rje_backup=True) ppath = rje.makePath('PhosALN') rje.mkDir(self, ppath) ## ~ [1c] Setup BLAST ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ## pblast = rje_blast.BLASTRun(self.log, self.cmd_list + ['formatdb=F']) pblast.setInfo({ 'Name': '%s.p.blast' % rje.baseFile(fasfile), 'DBase': self.info['PELMFas'], 'InFile': fasfile }) pblast.setStat({'HitAln': pblast.stat['OneLine']}) pblast.opt['Complexity Filter'] = False pblast.formatDB(force=False) ## ~ [1d] Setup GABLAM Stats ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ## gkey = 'GABLAMO ID' #x# % self.info['GABLAMO Key'] for g in ['ID', 'Hom']: if self.stat['%sSim' % g] < 1.0: self.stat['%sSim' % g] *= 100.0 self.stat['%sSim' % g] = max(0.0, self.stat['%sSim' % g]) ### ~ [2] PhosphoBLAST ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ### pblast.blast(use_existing=True, log=True) # BLAST pblast.readBLAST(gablam=True) # Read in while pblast.search: ## ~ [2a] Align relevant hits from each BLAST ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ## search = pblast.search.pop(0) qseq = qdict[search.info['Name']] idlist = [] qlen = qseq.aaLen() hitdict = search.hitSeq(self.obj['SeqList']) aln = rje_seq.SeqList( self.log, self.cmd_list + ['autoload=F', 'autofilter=F']) aln.seq = [qseq] pdict = {} # Dictionary of {hseq:[poslist]} rdict = {qseq: 0} # Dictionary of {hseq:res} for hit in search.hit[0:]: hseq = hitdict[hit] pdict[hseq] = [] for pos in rje.sortKeys( self.dict['PhosphoSites'][hseq.info['AccNum']]): pdict[hseq].append(pos) if hit.info['Name'] == search.info['Name']: if qseq.getSequence(case=False, gaps=False) != hseq.getSequence( case=False, gaps=False): self.log.errorLog( 'Major problem: Search/Hit sequence mismatch for same sequence "%s"' % hit.info['Name']) idlist.append(qseq) pdict[qseq] = pdict.pop(hseq) continue gdict = hit.globalFromLocal(qlen) qvh = float(100 * gdict['Query'][gkey]) / float(qlen) if qvh < self.stat['HomSim']: pdict.pop(hseq) continue aln.seq.append(hseq) if (qseq.sameSpec(hseq) or not self.opt['UseSpec'] ) and qvh >= self.stat['IDSim']: idlist.append(hseq) rdict[hseq] = 0
def mapSeq(self,seqlist,blast,search,outputmap=True): ### Performs actual mapping of sequence ''' Performs actual mapping of sequence. >> seq:SeqList object containing Sequence Object to be mapped >> blast:BLAST_Run object to perform BLAST and GABLAM >> search:Current BLAST search object for mapping >> outputmap:boolean = Whether to output mapping into a file [True] << returns shortName() of mapped sequence (or None if none) ''' try:### ~ [0] Setup ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ### seq = seqlist.getSeq(format='tuple') mapseq = self.obj['MapDB'] hits = blast.db('Hit').indexEntries('Query',search) self.printLog('#HITS','%s vs %s = %d hits' % (search,blast.str['DBase'],len(hits))) hitseq = {}; hitdata = {} for entry in hits: hitseq[entry['Hit']] = mapseq.getDictSeq(entry['Hit'],format='tuple') hitdata[entry['Hit']] = entry resdict = {'Query':search,'Hit':None,'Method':'Failed','Query_Species':rje_sequence.specCodeFromName(seq[0])} ### ~ [1] Order Hits and Check Species ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ### (hits,hitdict) = self.orderHits(seq,hits,hitseq) self.debug(hits) self.debug(hitdict) ### ~ [2] Attempt mapping ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ### for method in self.list['Mapping']: resdict['Hit'] = self.mapHit(seq,hits,hitdict,method.lower()) if resdict['Hit']: resdict['Method'] = method[:1].upper() + method[1:].lower() break elif method == 'gablam' and (len(hits) > 0): resdict['Method'] = 'Rejected' self.debug(resdict) ### ~[3] Output! ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ### if resdict['Hit']: #hitdict[hit]['Data']['ShortName'] hit = resdict['Hit']['Hit'] # resdict['Hit'] is the BLAST table entry for Hit shortname = hitdict[hit]['Data']['ShortName'] # This is just hit! self.printLog('#MAP','%s mapped to %s (by %s)' % (string.split(seq[0])[0],shortname,resdict['Method'])) ## Update Stats ## self.debug('') resdict['BlastRank'] = hitdata[hit]['Rank'] for key in hitdict[hit]: resdict[key] = hitdict[hit][key] ## Fasta and Redundancy ## if shortname in self.list['Mapped']: self.printLog('#MAP','%s already mapped before - not duplicating in %s' % (shortname,self.str['MapFas'])) else: self.list['Mapped'].append(shortname) if outputmap: open(self.str['MapFas'],'a').write('>%s\n%s\n' % (hitseq[hit][0],hitseq[hit][1])) resdict['Hit_Species'] = hitdict[hit]['Data']['SpecCode'] resdict['Hit'] = shortname else: ### ~ [2] GREP-based search ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ### if 'grep' in self.list['Mapping']: greplist = []; hitseq = '' self.printLog('#GREP','grep %s %s -B 1' % (seq[1],blast.str['DBase']),log=False) for line in os.popen('grep %s %s -B 1' % (seq[1],blast.str['DBase'])).readlines(): if line[:1] == '>': greplist.append(string.split(line[1:])[0]) elif not hitseq: hitseq = rje.chomp(line) if greplist: shortname = greplist.pop(0) resdict['Hit'] = shortname resdict['Method'] = 'Grep' resdict['Qry_ID'] = '100.0' resdict['Qry_Len'] = len(seq[1]) resdict['Hit_Len'] = len(hitseq) resdict['Hit_ID'] = 100.0 * len(hitseq) / len(seq[1]) try: resdict['Hit_Species'] = string.split(shortname,'_')[1] except: pass if shortname in self.list['Mapped']: self.printLog('#MAP','%s already mapped before - not duplicating in %s' % (shortname,self.str['MapFas'])) else: self.list['Mapped'].append(shortname) if outputmap: open(self.str['MapFas'],'a').write('>%s\n%s\n' % (shortname,hitseq)) for extra in greplist: self.printLog('#GREP','Warning! Query "%s" also hit "%s" with grep!' % (string.split(seq[0])[0],extra)) if not resdict['Hit'] and self.bool['Combine']: ## Fasta and Redundancy ## shortname = string.split(seq[0])[0] if shortname in self.list['Mapped']: self.printLog('#FAS','%s already in output - not duplicating in %s' % (shortname,self.str['MapFas'])) else: self.list['Mapped'].append(shortname) if outputmap: open(self.str['MapFas'],'a').write('>%s\n%s\n' % (seq[0],seq[1])) elif outputmap: open(self.str['MissFas'],'a').write('>%s\n%s\n' % (seq[0],seq[1])) self.printLog('#MISS','%s mapping %s' % (resdict['Query'],resdict['Method'])) if outputmap: rje.delimitedFileOutput(self,self.str['MapRes'],self.list['Headers'],rje.getDelimit(self.cmd_list),resdict) return resdict['Hit'] except: self.errorLog('Fudgesticks! SeqMapper.mapSeq(%s) has died!' % seq[0],quitchoice=True) return False
def hmmTable(self, outfile='', append=False, delimit=None): ### Outputs results table ''' Outputs results table. >> outfile:str = Name of output file >> append:boolean = whether to append file >> delimit:str = Delimiter to use [\t] ''' try: ### Setup ### if not outfile: outfile = self.info['HMMTab'] if outfile.lower() == 'none': self.log.printLog('#TAB', 'HMMTab = "None": No table output') return False if not delimit: delimit = rje.getDelimit(self.cmd_list, '\t') if not outfile: outfile = '%s.hmmer.%s' % (rje.baseFile( self.info['SearchDB'], True), rje.delimitExt(delimit)) self.readResults() self.log.printLog('#TAB', 'Tabulating results for %s searches into %s' % (len(self.search), outfile), log=False) ### Setup Resfile ### if self.opt['MySQL']: headers = [ 'HMM', 'Hit', 'Hit_Start', 'Hit_End', 'Eval', 'Score' ] else: headers = ['Type', 'Name', 'Start', 'End', 'Eval', 'Score'] if not append or not os.path.exists(outfile): rje.delimitedFileOutput(self, outfile, headers, delimit, rje_backup=True) ### Output Search details ### for search in self.search: for hit in search.hit: for aln in hit.aln: out = { 'HMM': search.info['Name'], 'Type': search.info['Name'], 'Name': hit.info['Name'], 'Hit': hit.info['Name'], 'Start': '%d' % aln.stat['SbjStart'], 'End': '%d' % aln.stat['SbjEnd'], 'Hit_Start': '%d' % aln.stat['SbjStart'], 'Hit_End': '%d' % aln.stat['SbjEnd'], 'Eval': '%.2e' % aln.stat['Expect'], 'Score': '%.1f' % aln.stat['BitScore'] } rje.delimitedFileOutput(self, outfile, headers, delimit, out) self.log.printLog( '#OUT', 'Results for %s searches output to %s.' % (len(self.search), outfile)) except: self.log.errorLog('Fatal Error during hmmTable(%s).' % outfile) raise
def _pepStats(self): ### Peptide Distance ''' Peptide Distance. ''' try: ### Setup ### seqlist = rje_seq.SeqList(self.log,self.cmd_list+['autoload=T']) aaprop = rje_aaprop.AAPropMatrix(self.log,self.cmd_list) aaprop.makePropDif() delimit = rje.getDelimit(self.cmd_list) ### Output File Setup ### OUTFILE = open('hrb.pepstats.%s' % rje.delimitExt(delimit),'w') headlist = ['peptide'] ## 10 Dimensional Peptide Property Output ## for property in rje.sortKeys(aaprop.prop): headlist.append(property.lower()) for aa in aaprop.prop[property].keys(): try: if aa not in ['-','X']: aaprop.prop[property][aa] = string.atoi(aaprop.prop[property][aa]) except: print aaprop.prop, property, aa, aaprop.prop[property][aa] raise ## Additional Stats ## headlist.append('net_charge') #headlist.append('hydrophobicity') headlist.append('charge_balance') headlist.append('hydrophobic_balance') #headlist.append('hydrophobicity_balance') ## Output rje.writeDelimit(OUTFILE,headlist,delimit) ### Calculate stats ### for pep in seqlist.seq: pepname = pep.shortName() if rje.matchExp('^(\S+_\d[CQ])',pepname): pepname = rje.matchExp('^(\S+_\d[CQ])',pepname)[0] outlist = [pepname] pepseq = pep.info['Sequence'] ## 10 Dimensional Peptide Property Output ## for property in rje.sortKeys(aaprop.prop): px = 0 for aa in pepseq: px += aaprop.prop[property][aa] outlist.append('%d' % px) ## Additional Stats ## net_charge = 0 for aa in pepseq: net_charge += (aaprop.prop['Positive'][aa] - aaprop.prop['Negative'][aa]) outlist.append('%d' % net_charge) charge_balance = 0 hydrophobic_balance = 0 for r in range(len(pepseq)): charge_balance += aaprop.prop['Charged'][pepseq[r]] * (1.0 / (r+1)) charge_balance -= aaprop.prop['Charged'][pepseq[r]] * (1.0 / (10-r)) hydrophobic_balance += aaprop.prop['Hydrophobic'][pepseq[r]] * (1.0 / (r+1)) hydrophobic_balance -= aaprop.prop['Hydrophobic'][pepseq[r]] * (1.0 / (10-r)) outlist.append('%.3f' % charge_balance) outlist.append('%.3f' % hydrophobic_balance) rje.writeDelimit(OUTFILE,outlist,delimit) ### Finish ### OUTFILE.close() except: self.log.errorLog('Error in _pepStats',printerror=True,quitchoice=False) raise # Delete this if method error not terrible
def run(self): ### Main Run method ''' Main Run method. ''' try: ### SLiMDisc Run ### if self.opt['SLiMDisc']: return self.slimDisc() ### TEIRESIAS ### if self.opt['Teiresias']: ## Setup ## seqlist = rje_seq.SeqList(self.log,self.cmd_list) infile = '%s.teiresias.fas' % rje.baseFile(seqlist.info['Name'],True) outfile = '%s.teiresias.out' % rje.baseFile(seqlist.info['Name'],True) run_teiresias = True if rje.isYounger(outfile,infile) == outfile: if self.stat['Interactive'] < 1 or not rje.yesNo('%s and %s exist already. Regenerate?' % (infile,outfile),'N'): run_teiresias = False ## Run TEIRESIAS ## if run_teiresias: seqlist.saveFasta(seqfile=infile,name='Teiresias') ### Saves sequences in fasta format command = rje.makePath(self.info['TeiresiasPath'],True) command += ' -i%s -o%s %s' % (infile,outfile,self.info['TeiresiasOpt']) self.log.printLog('#CMD',command) os.system(command) ## Read Results ## self.verbose(0,2,'Reading TEIRESIAS output from %s...' % outfile,1) self.list['Pattern'] = [] RESULTS = open(outfile,'r') line = RESULTS.readline() while line: if rje.matchExp('^(\d+)\s+(\d+)\s+(\S+)\s+(\d.+\d)$',line): # New pattern self.addTeiresiasPattern(rje.matchExp('^(\d+)\s+(\d+)\s+(\S+)\s+(\d.+\d)$',line)) elif len(line) > 3 and line[0] != '#': self.log.errorLog('Did not recognise line: %s' % line,False,False) line = RESULTS.readline() RESULTS.close() patx = len(self.list['Pattern']) self.log.printLog('#PAT','%s TEIRESIAS patterns read from %s.' % (rje.integerString(patx),outfile)) ## Calculate Information Content ## aafreq = seqlist.aaFreq() self.verbose(0,3,'Calculating Information Content & Length stats...',0) occx = 0 for pattern in self.list['Pattern']: pattern.stat['Info'] = self.calculateScore(pattern.info['Pattern'],aafreq) pattern._makeLength() occx += 1 rje.progressPrint(self,occx,patx/100,patx/10) self.verbose(0,1,'...Done!',2) ## Prepare Results ## delimit = rje.getDelimit(self.cmd_list) if self.info['Name'] == 'None': self.info['Name'] = '%s.teiresias.%s' % (rje.baseFile(seqlist.info['Name'],True),rje.delimitExt(delimit)) if self.opt['MySQL']: # Two tables patfile = os.path.splitext(self.info['Name']) occfile = '%s.occ%s' % (patfile[0],patfile[1]) patfile = '%s.patterns%s' % (patfile[0],patfile[1]) if self.opt['Append']: PATFILE = open(patfile,'a') OCCFILE = open(occfile,'a') else: PATFILE = open(patfile,'w') rje.writeDelimit(PATFILE,['pattern','tot_occ','seq_occ','info','len','fix','wild'],delimit) OCCFILE = open(occfile,'a') rje.writeDelimit(OCCFILE,['seq_id','pos','pattern','pat_match'],delimit) else: if self.opt['Append']: RESFILE = open(self.info['Name'],'a') else: RESFILE = open(patfile,'w') rje.writeDelimit(RESFILE,['Sequence Name','Position','Pattern','Match','Total Occurrences','Num Sequences','Information Content','Length','Fixed','Wildcard'],delimit) ## Save Results ## occx = 0 for pattern in self.list['Pattern']: patstats = [] for stat in ['OccCount','SeqCount','Info','Length','Fixed','Wildcards']: patstats.append('%d' % pattern.stat[stat]) patstats[2] = '%.3f' % pattern.stat['Info'] if self.opt['MySQL']: # Two tables rje.writeDelimit(PATFILE,[pattern.info['Pattern']] + patstats,delimit) for occ in rje.sortKeys(pattern.occ): seq = seqlist.seq[occ] for pos in pattern.occ[occ]: match = seq.info['Sequence'][pos:(pos+pattern.stat['Length'])] outlist = [seq.shortName(),'%d' % pos,pattern.info['Pattern'],match] if self.opt['MySQL']: # Two tables rje.writeDelimit(OCCFILE,outlist,delimit) else: rje.writeDelimit(RESFILE,outlist+patstats,delimit) occx += 1 if self.opt['MySQL']: # Two tables PATFILE.close() OCCFILE.close() self.log.printLog('#OUT','%s patterns output to %s.' % (rje.integerString(patx),patfile)) self.log.printLog('#OUT','%s pattern occurrences output to %s.' % (rje.integerString(occx),occfile)) else: RESFILE.close() self.log.printLog('#OUT','%s occurrences of %s patterns output to %s.' % (rje.integerString(occx),rje.integerString(patx),self.info['Name'])) ### InfoContent ### elif self.info['Info'] != 'None': ## Setup ## alphabet = rje_seq.alph_protx if not os.path.exists(self.info['Info']): self.log.errorLog('Input file %s missing!' % self.info['Info'],False,False) return False else: mypresto = presto.Presto(self.log,self.cmd_list) mypresto.loadMotifs(file=self.info['Info'],clear=True) seqlist = rje_seq.SeqList(self.log,self.cmd_list+['autoload=T']) if seqlist.seqNum() > 0: aafreq = seqlist.aaFreq(alphabet=None,fromfile=None,loadfile=None,total=False) ### Returns dictionary of AA (& gap etc.) frequencies else: aafreq = {} for aa in alphabet: aafreq[aa] = 1.0 / len(alphabet) alphabet = aafreq.keys() maxinfo = 0 for aa in alphabet: maxinfo += (aafreq[aa] * math.log(aafreq[aa],2)) ## Output ## delimit = rje.getDelimit(self.cmd_list) ext = rje.delimitExt(delimit) outfile = '%s.info.%s' % (rje.baseFile(self.info['Info'],True,['.txt','.%s' % ext]),ext) if self.opt['Append']: OUTFILE = open(outfile,'a') else: OUTFILE = open(outfile,'w') rje.writeDelimit(OUTFILE,['motif','pattern','info'],delimit) ## Calculate Information Scores ## for motif in mypresto.motif: self.verbose(2,4,motif.info['Sequence'],0) pattern = string.replace(motif.info['Sequence'],'X','.') elements = string.split(pattern,'-') pattern = '' for el in elements: if el.find('.{') == 0: # Ambiguous spacer length - compress pattern += '.' else: pattern += el self.verbose(2,2,'=> %s' % pattern,1) motif.stat['Info'] = self.calculateInformationContent(pattern,aafreq,maxinfo,self.stat['InfoGapPen']) self.verbose(0,3,'%s (%s) = %.2f' % (motif.info['Name'],pattern,motif.stat['Info']),1) ## Output ## rje.writeDelimit(OUTFILE,[motif.info['Name'],pattern,'%.2f' % motif.stat['Info']],delimit) ## Finish ## OUTFILE.close() except: self.log.errorLog('Error in run().',printerror=True,quitchoice=False) raise # Delete this if method error not terrible
def mapPhosByBLAST(self,fasfile): ### BLAST sequences against phosphoDB, align hits & mark sites (ID & Homology) '''BLAST sequences against phosphoDB, align hits and mark phosphosites (ID & Homology).''' try:### ~ [1] Setup ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ### ## ~ [1a] Setup fasfile ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ## scmd = self.cmd_list + ['seqin=%s' % fasfile,'autoload=T','autofilter=F'] qseqlist = rje_seq.SeqList(self.log,scmd) qdict = qseqlist.seqNameDic() ## ~ [1b] Setup results files/directories ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ## basefile = rje.baseFile(fasfile) if self.info['PhosRes'].lower() in ['','none']: self.info['PhosRes'] = '%s.phosres.tdt' % basefile headers = ['Name','Pos','AA','PELM','PELMPos','Evidence'] delimit = rje.getDelimit(self.cmd_list,rje.delimitFromExt(filename=self.info['PhosRes'])) rje.delimitedFileOutput(self,self.info['PhosRes'],headers,delimit,rje_backup=True) ppath = rje.makePath('PhosALN') rje.mkDir(self,ppath) ## ~ [1c] Setup BLAST ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ## pblast = rje_blast.BLASTRun(self.log,self.cmd_list+['formatdb=F']) pblast.setInfo({'Name':'%s.p.blast' % rje.baseFile(fasfile),'DBase':self.info['PELMFas'],'InFile':fasfile}) pblast.setStat({'HitAln':pblast.stat['OneLine']}) pblast.opt['Complexity Filter'] = False pblast.formatDB(force=False) ## ~ [1d] Setup GABLAM Stats ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ## gkey = 'GABLAMO ID' #x# % self.info['GABLAMO Key'] for g in ['ID','Hom']: if self.stat['%sSim' % g] < 1.0: self.stat['%sSim' % g] *= 100.0 self.stat['%sSim' % g] = max(0.0,self.stat['%sSim' % g]) ### ~ [2] PhosphoBLAST ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ### pblast.blast(use_existing=True,log=True) # BLAST pblast.readBLAST(gablam=True) # Read in while pblast.search: ## ~ [2a] Align relevant hits from each BLAST ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ## search = pblast.search.pop(0) qseq = qdict[search.info['Name']] idlist = [] qlen = qseq.aaLen() hitdict = search.hitSeq(self.obj['SeqList']) aln = rje_seq.SeqList(self.log,self.cmd_list+['autoload=F','autofilter=F']) aln.seq = [qseq] pdict = {} # Dictionary of {hseq:[poslist]} rdict = {qseq:0} # Dictionary of {hseq:res} for hit in search.hit[0:]: hseq = hitdict[hit] pdict[hseq] = [] for pos in rje.sortKeys(self.dict['PhosphoSites'][hseq.info['AccNum']]): pdict[hseq].append(pos) if hit.info['Name'] == search.info['Name']: if qseq.getSequence(case=False,gaps=False) != hseq.getSequence(case=False,gaps=False): self.log.errorLog('Major problem: Search/Hit sequence mismatch for same sequence "%s"' % hit.info['Name']) idlist.append(qseq) pdict[qseq] = pdict.pop(hseq) continue gdict = hit.globalFromLocal(qlen) qvh = float(100 * gdict['Query'][gkey]) / float(qlen) if qvh < self.stat['HomSim']: pdict.pop(hseq) continue aln.seq.append(hseq) if (qseq.sameSpec(hseq) or not self.opt['UseSpec']) and qvh >= self.stat['IDSim']: idlist.append(hseq) rdict[hseq] = 0 aln.muscleAln() #x#outfile='%s%s.phosaln.fas' % (ppath,qseq.info['AccNum'])) aln._addSeq('PhosAln','-' * qseq.seqLen()) aln.info['Name'] = '%s%s.phosaln.fas' % (ppath,qseq.info['AccNum']) ## ~ [2b] Map phosphorylations ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ## print '>>>\n', aln.seq, pdict.keys(), rdict.keys() for a in range(qseq.seqLen()): if qseq.info['Sequence'][a] != '-': rdict[qseq] += 1 for hseq in pdict: if hseq.info['Sequence'][a] == '-': continue if hseq != qseq: rdict[hseq] += 1 if rdict[hseq] in pdict[hseq] and qseq.info['Sequence'][a] == hseq.info['Sequence'][a]: # Phosphosite pdata = {'Name':search.info['Name'],'Pos':rdict[qseq],'AA':qseq.info['Sequence'][a], 'PELM':hseq.shortName(),'PELMPos':rdict[hseq],'Evidence':'Hom'} if hseq == qseq: pdata['Evidence'] = 'Self' elif hseq in idlist: pdata['Evidence'] = 'ID' rje.delimitedFileOutput(self,self.info['PhosRes'],headers,delimit,pdata) self.addPhos(aln.seq[-1],a,pdata['Evidence']) ## ~ [2c] Add Scansite/NetPhos if made? ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ## ## ~ [2d] Save alignment ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ## aln.saveFasta() # Align hits for each > X %ID # Map phosphosites onto alignment and output # return except: self.log.errorLog('Problem during PhosphoSeq.mapPhosByBLAST')
def _pepStats(self): ### Peptide Distance ''' Peptide Distance. ''' try: ### Setup ### seqlist = rje_seq.SeqList(self.log, self.cmd_list + ['autoload=T']) aaprop = rje_aaprop.AAPropMatrix(self.log, self.cmd_list) aaprop.makePropDif() delimit = rje.getDelimit(self.cmd_list) ### Output File Setup ### OUTFILE = open('hrb.pepstats.%s' % rje.delimitExt(delimit), 'w') headlist = ['peptide'] ## 10 Dimensional Peptide Property Output ## for property in rje.sortKeys(aaprop.prop): headlist.append(property.lower()) for aa in aaprop.prop[property].keys(): try: if aa not in ['-', 'X']: aaprop.prop[property][aa] = string.atoi( aaprop.prop[property][aa]) except: print aaprop.prop, property, aa, aaprop.prop[property][ aa] raise ## Additional Stats ## headlist.append('net_charge') #headlist.append('hydrophobicity') headlist.append('charge_balance') headlist.append('hydrophobic_balance') #headlist.append('hydrophobicity_balance') ## Output rje.writeDelimit(OUTFILE, headlist, delimit) ### Calculate stats ### for pep in seqlist.seq: pepname = pep.shortName() if rje.matchExp('^(\S+_\d[CQ])', pepname): pepname = rje.matchExp('^(\S+_\d[CQ])', pepname)[0] outlist = [pepname] pepseq = pep.info['Sequence'] ## 10 Dimensional Peptide Property Output ## for property in rje.sortKeys(aaprop.prop): px = 0 for aa in pepseq: px += aaprop.prop[property][aa] outlist.append('%d' % px) ## Additional Stats ## net_charge = 0 for aa in pepseq: net_charge += (aaprop.prop['Positive'][aa] - aaprop.prop['Negative'][aa]) outlist.append('%d' % net_charge) charge_balance = 0 hydrophobic_balance = 0 for r in range(len(pepseq)): charge_balance += aaprop.prop['Charged'][pepseq[r]] * ( 1.0 / (r + 1)) charge_balance -= aaprop.prop['Charged'][pepseq[r]] * ( 1.0 / (10 - r)) hydrophobic_balance += aaprop.prop['Hydrophobic'][ pepseq[r]] * (1.0 / (r + 1)) hydrophobic_balance -= aaprop.prop['Hydrophobic'][ pepseq[r]] * (1.0 / (10 - r)) outlist.append('%.3f' % charge_balance) outlist.append('%.3f' % hydrophobic_balance) rje.writeDelimit(OUTFILE, outlist, delimit) ### Finish ### OUTFILE.close() except: self.log.errorLog('Error in _pepStats', printerror=True, quitchoice=False) raise # Delete this if method error not terrible
def _pepDis(self): ### Peptide Distance ''' Peptide Distance. ''' try: ### <0> ### Setup seqlist = rje_seq.SeqList(self.log,self.cmd_list+['autoload=T']) dismatrix = rje_dismatrix.DisMatrix(self.log,self.cmd_list) dismatrix.info['Name'] = self.info['Method'] dismatrix.opt['Symmetric'] = True if self.info['Method'] in ['ds_prop','tot_prop','best_prop']: aaprop = rje_aaprop.AAPropMatrix(self.log,self.cmd_list) #aaprop.readAAProp() aaprop.makePropDif() elif self.info['Method'] == 'pam': pam = rje_pam.PamCtrl(log=self.log,cmd_list=self.cmd_list) ### <1> ### Make DisMatrix for seq1 in seqlist.seq: for seq2 in seqlist.seq: if seqlist.seq.index(seq1) > seqlist.seq.index(seq2): # No need to calculate - symmetrical! continue dis = 0 if seq1 == seq2 and self.info['OutMatrix'] == 'phylip': dis = 0 elif self.info['Method'] in ['ds_prop','ds_id']: (self_dis1,self_dis2) = (0,0) for r1 in range(seq1.seqLen()): for r2 in range(r1,seq2.seqLen()): (a1,a2) = (seq1.info['Sequence'][r1],seq2.info['Sequence'][r2]) (s1,s2) = (seq1.info['Sequence'][r2],seq2.info['Sequence'][r1]) phys_dis = r2 - r1 if self.info['Method'] == 'ds_prop': dis += (aaprop.pdif['%s%s' % (a1,a2)] * (seq1.seqLen() - phys_dis)) self_dis1 += (aaprop.pdif['%s%s' % (a1,s1)] * (seq1.seqLen() - phys_dis)) self_dis2 += (aaprop.pdif['%s%s' % (a2,s2)] * (seq1.seqLen() - phys_dis)) elif self.info['Method'] == 'ds_id' and a1 != a2: dis += (seq1.seqLen() - phys_dis) if self.info['Method'] == 'ds_id' and a1 != s1: self_dis1 += (seq1.seqLen() - phys_dis) if self.info['Method'] == 'ds_id' and a2 != s2: self_dis2 += (seq1.seqLen() - phys_dis) dis -= (self_dis1 + self_dis2) / 2.0 elif self.info['Method'] == 'tot_prop': proptot = {} for property in aaprop.prop.keys(): proptot[property] = {seq1:0.0,seq2:0.0} for seq in [seq1,seq2]: for r in range(seq.seqLen()): aa = seq.info['Sequence'][r] for property in aaprop.prop.keys(): proptot[property][seq] += string.atof(aaprop.prop[property][aa]) for property in aaprop.prop.keys(): if proptot[property][seq1] > proptot[property][seq2]: dis += (proptot[property][seq1] - proptot[property][seq2]) else: dis += (proptot[property][seq2] - proptot[property][seq1]) elif self.info['Method'] == 'pam': dis = pam.pamML(ancseq=seq1.info['Sequence'],descseq=seq2.info['Sequence']) elif self.info['Method'] == 'best_prop': min_dis = seq1.seqLen() * len(aaprop.prop) pepseq1 = seq1.info['Sequence'] for c in range(seq1.seqLen()): # Circular start dis = 0 pepseq2 = seq2.info['Sequence'][c:] + seq2.info['Sequence'][:c] for r in range(seq1.seqLen()): (a1,a2) = (pepseq1[r],pepseq2[r]) dis += aaprop.pdif['%s%s' % (a1,a2)] if dis < min_dis: min_dis = dis dis = min_dis dismatrix.addDis(seq1,seq2,dis) ### <2> ### Output if self.info['OutMatrix'] == 'phylip': delimit = ' ' format = 'phylip' else: delimit = rje.getDelimit(self.cmd_list,',') format = 'None' outfile = '%s.%s.%s' % (rje.baseFile(seqlist.info['Name'],True),self.info['Method'],rje.delimitExt(delimit)) dismatrix.saveMatrix(seqlist.seq,outfile,delimit,format=format) except: self.log.errorLog('Error in _pepDis',printerror=True,quitchoice=False) raise # Delete this if method error not terrible
def convert(self, filelist=[], outfile=None ): ### Converts scansite output files in FileList to Outfile ''' Converts scansite output files in FileList to Outfile. ''' try: ### Setup ### _stage = 'Setup' if len(filelist) < 1: filelist = self.list['FileList'] if not outfile: outfile = self.info['Name'] if len(filelist) < 1: self.log.errorLog( 'No scansite files to convert! %s unchanged/not made.' % outfile, printerror=False) return False delimit = rje.getDelimit(self.cmd_list) ext = rje.delimitExt(delimit) if ext != outfile[-3:]: newfile = outfile[:-3] + ext if rje.yesNo('Change file name from %s to %s?' % (outfile, newfile)): outfile = newfile self.log.printLog( '#OUT', 'Converting %d file(s), output to %s.' % (len(filelist), outfile)) ### Output File ### _stage = 'Output File' if not self.opt['Append'] or not os.path.exists( outfile): # Create with header OUTFILE = open(outfile, 'w') headers = [ 'seq_id', 'enzyme', 'enz_group', 'aa', 'pos', 'score', 'percentile', 'matchseq', 'sa' ] rje.writeDelimit(OUTFILE, headers, delimit) else: OUTFILE = open(outfile, 'a') ### Conversion ### _stage = 'Conversion' sx = 0 for infile in filelist: if not os.path.exists(infile): self.log.errorLog( 'Input file %s does not exist! :o(' % infile, False, False) continue fx = 0 INFILE = open(infile, 'r') inline = rje.nextLine(INFILE) while inline != None: if rje.matchExp(re_scansite, inline): scanlist = rje.matchExp(re_scansite, inline) rje.writeDelimit(OUTFILE, scanlist, delimit) sx += 1 fx += 1 rje.progressPrint(self, sx) inline = rje.nextLine(INFILE) self.log.printLog( '#OUT', '%s scansite results from %s. (%s Total.)' % (rje.integerString(fx), infile, rje.integerString(sx))) INFILE.close() ### End ### _stage = 'End' OUTFILE.close() self.log.printLog( '#OUT', '%s scansite results output to %s.' % (rje.integerString(sx), outfile)) return True except: self.log.errorLog('Error in convert(%s)' % _stage, printerror=True, quitchoice=False) raise
def _pepDis(self): ### Peptide Distance ''' Peptide Distance. ''' try: ### <0> ### Setup seqlist = rje_seq.SeqList(self.log, self.cmd_list + ['autoload=T']) dismatrix = rje_dismatrix.DisMatrix(self.log, self.cmd_list) dismatrix.info['Name'] = self.info['Method'] dismatrix.opt['Symmetric'] = True if self.info['Method'] in ['ds_prop', 'tot_prop', 'best_prop']: aaprop = rje_aaprop.AAPropMatrix(self.log, self.cmd_list) #aaprop.readAAProp() aaprop.makePropDif() elif self.info['Method'] == 'pam': pam = rje_pam.PamCtrl(log=self.log, cmd_list=self.cmd_list) ### <1> ### Make DisMatrix for seq1 in seqlist.seq: for seq2 in seqlist.seq: if seqlist.seq.index(seq1) > seqlist.seq.index( seq2): # No need to calculate - symmetrical! continue dis = 0 if seq1 == seq2 and self.info['OutMatrix'] == 'phylip': dis = 0 elif self.info['Method'] in ['ds_prop', 'ds_id']: (self_dis1, self_dis2) = (0, 0) for r1 in range(seq1.seqLen()): for r2 in range(r1, seq2.seqLen()): (a1, a2) = (seq1.info['Sequence'][r1], seq2.info['Sequence'][r2]) (s1, s2) = (seq1.info['Sequence'][r2], seq2.info['Sequence'][r1]) phys_dis = r2 - r1 if self.info['Method'] == 'ds_prop': dis += (aaprop.pdif['%s%s' % (a1, a2)] * (seq1.seqLen() - phys_dis)) self_dis1 += (aaprop.pdif['%s%s' % (a1, s1)] * (seq1.seqLen() - phys_dis)) self_dis2 += (aaprop.pdif['%s%s' % (a2, s2)] * (seq1.seqLen() - phys_dis)) elif self.info[ 'Method'] == 'ds_id' and a1 != a2: dis += (seq1.seqLen() - phys_dis) if self.info['Method'] == 'ds_id' and a1 != s1: self_dis1 += (seq1.seqLen() - phys_dis) if self.info['Method'] == 'ds_id' and a2 != s2: self_dis2 += (seq1.seqLen() - phys_dis) dis -= (self_dis1 + self_dis2) / 2.0 elif self.info['Method'] == 'tot_prop': proptot = {} for property in aaprop.prop.keys(): proptot[property] = {seq1: 0.0, seq2: 0.0} for seq in [seq1, seq2]: for r in range(seq.seqLen()): aa = seq.info['Sequence'][r] for property in aaprop.prop.keys(): proptot[property][seq] += string.atof( aaprop.prop[property][aa]) for property in aaprop.prop.keys(): if proptot[property][seq1] > proptot[property][ seq2]: dis += (proptot[property][seq1] - proptot[property][seq2]) else: dis += (proptot[property][seq2] - proptot[property][seq1]) elif self.info['Method'] == 'pam': dis = pam.pamML(ancseq=seq1.info['Sequence'], descseq=seq2.info['Sequence']) elif self.info['Method'] == 'best_prop': min_dis = seq1.seqLen() * len(aaprop.prop) pepseq1 = seq1.info['Sequence'] for c in range(seq1.seqLen()): # Circular start dis = 0 pepseq2 = seq2.info['Sequence'][c:] + seq2.info[ 'Sequence'][:c] for r in range(seq1.seqLen()): (a1, a2) = (pepseq1[r], pepseq2[r]) dis += aaprop.pdif['%s%s' % (a1, a2)] if dis < min_dis: min_dis = dis dis = min_dis dismatrix.addDis(seq1, seq2, dis) ### <2> ### Output if self.info['OutMatrix'] == 'phylip': delimit = ' ' format = 'phylip' else: delimit = rje.getDelimit(self.cmd_list, ',') format = 'None' outfile = '%s.%s.%s' % (rje.baseFile( seqlist.info['Name'], True), self.info['Method'], rje.delimitExt(delimit)) dismatrix.saveMatrix(seqlist.seq, outfile, delimit, format=format) except: self.log.errorLog('Error in _pepDis', printerror=True, quitchoice=False) raise # Delete this if method error not terrible
def badasp(out,mainlog,cmd_list,tree=None): ### Main BADASP Method ''' Main BADASP Method. Automated run if interactive < 1 <1> Load Sequences and Tree <2> Define Subfamilies <3> GASP Ancestral Sequence Prediction <4> Peform Functional Specificity and Sequence Conservation Calculations <5> Output Results ''' try: ### <0> ### Setup _seqfile = None _treefile = None append_file = None basefile = None for cmd in cmd_list: if cmd.find('seqin=') == 0: _seqfile = cmd[len('seqin='):] if _seqfile[-4] == '.': _seqfile = _seqfile[:-4] if cmd.find('useanc=') == 0: _seqfile = cmd[len('useanc='):] if _seqfile[-8:] == '.anc.fas': _seqfile = _seqfile[:-8] if cmd.find('nsfin=') == 0: _treefile = cmd[len('nsfin='):] if cmd.find('append=') == 0: append_file = cmd[len('append='):] if cmd.find('basefile=') == 0: basefile = cmd[len('basefile='):] if _seqfile and os.path.exists('%s.grp' % _seqfile): cmd_list.append('group=%s.grp' % _seqfile) if _seqfile and _treefile == None: if rje.checkForFile('%s.nwk' % _seqfile): _treefile = '%s.nwk' % _seqfile else: _treefile = '%s.nsf' % _seqfile out.verbose(0,2,'Looking for treefile %s.' % _treefile,1) if rje.checkForFile(_treefile): cmd_list.append('nsfin=%s' % _treefile) if tree == None: mainlog.verbose(0,1,'Tree: %s' % cmd_list,2) tree = rje_tree.Tree(log=mainlog,cmd_list=cmd_list) #tree._setupFromCmd() if tree.stat['MinFamNum'] < 2: tree.stat['MinFamNum'] = 2 ### <1> ### Load Sequences and Tree while out.stat['Interactive'] > 0 or tree.obj['SeqList'] == None: tree = rje_tree.treeMenu(out,mainlog,['root=yes']+cmd_list,tree) if tree.obj['SeqList'] and tree.opt['Rooted']: break else: print '\n ** Must have loaded sequences and a rooted tree. ** \n' if out.stat['Interactive'] < 0 or rje.yesNo('Quit BADASP?',default='N'): sys.exit() basename = tree.obj['SeqList'].info['Name'] if basename[-4:] == '.fas': basename = basename[:-4] if basename[-4:] == '.anc': basename = basename[:-4] if basefile: basename = basefile except SystemExit: raise except: mainlog.errorLog('Major Error in badasp loading sequences and tree',True) try: ### <2> ### Define Subfamilies while out.stat['Interactive'] > 0 or tree.groupNum() < 2: tree.treeGroup(callmenu=True) if tree.groupNum() >= 2: break else: mainlog.errorLog('Must have at least two subfamilies for specificity analyses.',printerror=False) if out.stat['Interactive'] < 0 or rje.yesNo('Continue without specificity analyses?'): cmd_list.append('funcspec=') break elif rje.yesNo('Abort BADASP?'): sys.exit() except SystemExit: raise except: mainlog.errorLog('Major Error in BADASP subfamilies',True) try: ### <3> ### GASP Ancestral Sequence Prediction if tree.node[-1].obj['Sequence'] == None: # No ancseq loaded while out.stat['Interactive'] > 0 and rje.yesNo('Use %s for output filenames?' % basename) == False: basename = rje.choice('FILEname (FILE.anc.fas, FILE.anc.nsf, FILE.txt)?: ', default=basename) mygasp = rje_ancseq.Gasp(tree=tree,ancfile=basename,cmd_list=cmd_list,log=mainlog) out.verbose(0,2,'%s' % mygasp.details(),1) if out.stat['Interactive'] > 0: if rje.yesNo('Use these parameters?') == False: mygasp.edit() mygasp.gasp() except: mainlog.errorLog('Major Error in BADASP GASP',True) try: ### <4> ### Peform Functional Specificity and Sequence Conservation Calculations _stage = '<4> Specificity/Conservation Analyses' aaprop = rje_aaprop.AAPropMatrix(log=mainlog,cmd_list=cmd_list) query = tree.obj['SeqList'].obj['QuerySeq'] ## <a> ## Chosen Methods _stage = '<4a> Specificity/Conservation Analyses - Chosen Methods' funcspec = rje_specificity.methodlist # ['BAD','BADN','BADX'] seqcon = rje_conseq.methodlist # ['info'] for cmd in cmd_list: if cmd.find('funcspec=') == 0: funcspec = cmd[9:].split(',') if cmd.find('seqcon=') == 0: seqcon = cmd[len('seqcon='):].split(',') if 'all' in funcspec: funcspec = rje_specificity.methodlist if 'all' in seqcon: seqcon = rje_conseq.methodlist for method in ['BADX','BADN','QPCon_Mean','QPCon_Abs','QPCon_Mean_All']: while method in funcspec and query == None: if rje.yesNo('Method %s needs query but none given. Drop %s from specificity methods?' % (method,method)): funcspec.remove(method) break for seq in tree.obj['SeqList'].seq: if rje.yesNo('Method %s needs query but none given. Use sequence 1 (%s)?' % (method,seq.shortName()),default='N'): query = seq tree.obj['SeqList'].obj['Query'] = seq break while method in seqcon and query == None: if rje.yesNo('Method %s needs query but none given. Drop %s from conservation methods?' % (method,method)): seqcon.remove(method) break for seq in tree.obj['SeqList'].seq: if rje.yesNo('Method %s needs query but none given. Use sequence 1 (%s)?' % (method,seq.shortName()),default='N'): query = seq tree.obj['SeqList'].obj['Query'] = seq break qname = query if query: qname = query.info['Name'] out.verbose(0,3,'\nQuery = %s' % qname,2) ## <b> ## Spec Calculations _stage = '<4b> Specificity Calculations' specmatrix = rje_specificity.FuncSpec(log=mainlog,cmd_list=cmd_list,tree=tree,aaprop=aaprop) specmatrix.calcScore(query=query,methods=funcspec) ## <c> ## Conservation Calculations _stage = '<4c> Specificity/Conservation Analyses - Conservation Calculations' conseq = rje_conseq.SeqStat(log=mainlog,cmd_list=cmd_list,tree=tree,aaprop=aaprop) conseq.calcScore(query=query,methods=seqcon) ### Sends appropriate seqlist to self.calcScore() ## <d> ## Special Case: QPCon vs All seqs _stage = '<4d> Specificity/Conservation Analyses - QPCon vs All' qpconall = [] #if 'QPCon_Abs_All' in seqcon and query: # qpconall.append('QPCon_Abs') if 'QPCon_Mean_All' in seqcon and query: qpconall.append('QPCon_Mean') for qp in qpconall: conseq.score['%s_All' % qp] = conseq.score[qp] if conseq.alnwin.has_key(qp): conseq.alnwin['%s_All' % qp] = conseq.alnwin[qp] if conseq.qrywin.has_key(qp): conseq.qrywin['%s_All' % qp] = conseq.qrywin[qp] if conseq.rank.has_key(qp): conseq.rank['%s_All' % qp] = conseq.rank[qp] if conseq.alnrankwin.has_key(qp): conseq.alnrankwin['%s_All' % qp] = conseq.alnrankwin[qp] if conseq.qryrankwin.has_key(qp): conseq.qryrankwin['%s_All' % qp] = conseq.qryrankwin[qp] _stage = '<4d> Specificity/Conservation Analyses - FamQP' famqp = [] if 'QPCon_Mean' in seqcon: famqp.append('QPCon_Mean') if 'QPCon_Abs' in seqcon: famqp.append('QPCon_Abs') if len(famqp) > 0 and query: #!# And subfam option? qseq = [] for fam in tree.subfam: for node in tree._nodeClade(fam): if query == node.obj['Sequence']: for qnode in tree._nodeClade(fam): qseq.append(qnode.obj['Sequence']) conseq.calcScore(query=query,seqlist=qseq,methods=famqp) ### Sends appropriate seqlist to self.calcScore() except: mainlog.errorLog('Major Error in BADASP Specificity Analysis (%s):' % _stage,True) try: ### <5> ### Full Output Results _stage = '<5> Full Output' # This output is in a tab- or comma-delimited file for easy manipulation or viewing with other programs. # (1) statistics for a given residue; # (2) statistics for a given window size across # - (a) the whole alignment, (node=None) # - (b) the Query protein of interest (if given) and (node=QueryNode) # - (c) the ancestral sequence of each subfamily; (node=ancnode) # (3) Predicted ancestral sequences at # - (a) the root and # - (b) the ancestor of each subfamily. delimit = rje.getDelimit(cmd_list) ## <a> ## Setup _stage = '<5a> Output - Setup' rankout = specmatrix.opt['Rank'] #tree._regenerateSeqList(tree.obj['SeqList'],tree.node) root = tree.node[-1].obj['Sequence'] #!# At some point, make sure this is the most ancient duplication! out.verbose(0,3,'\nBADASP Results Output (%s.badasp) ...' % basename,0) ## <b> ## Header _stage = '<5b> Output - Header' _header = True if append_file: if rje.checkForFile(append_file): _header = False BADASP = open(append_file, 'a') else: BADASP = open('%s.badasp' % basename, 'w') BADASP.write("BADASP Output: %s\n" % (time.asctime(time.localtime(time.time())))) BADASP.write('%s\n\n' % cmd_list) header = ['aln_pos','anc_aa'] # Aln Pos and AA alnlen = 0 statlist = funcspec + seqcon _stage = '<5b-i> Output - Header Query' if query: header += ['qry_pos','qry_aa'] # Qry Pos and AA _stage = '<5b-ii> Output - Header Subfam' for f in range(len(tree.subfam)): header += ['fam%d_pos' % (f+1),'fam%d_aa' % (f+1)] # Subfam Pos and AA for func in statlist: _stage = '<5b-iii> Output - Header %s' % func statobj = statObj(method=func,objlist=[specmatrix,conseq]) fs = func.lower() alnlen = len(statobj.score[func]) header.append(fs) # Score if rankout: header.append('%s_rank' % fs) # Rank if statobj.stat['WinSize'] > 1: header.append('%s_alnwin' % fs) # Full align window if rankout: header.append('%s_alnrankwin' % fs) # Rank if query: header.append('%s_qrywin' % fs) # Qry window if rankout: header.append('%s_qryrankwin' % fs) # Rank if func in funcspec: for f in range(len(tree.subfam)): header.append('%s_fam%d_win' % (fs,f+1)) # Subfam windows if rankout: header.append('%s_fam%d_rankwin' % (fs,f+1)) # Subfam windows #if _header: BADASP.write('%s\n' % string.join(header, delimit)) out.verbose(1,3,'%s...' % string.join(header, delimit),0) ## <c> ## Stats _stage = '<5c> Stats' qr = 0 # Qry pos fr = [0] * len(tree.subfam) # List of subfam positions aa = '' # Root aa qa = '' # Qry aa fa = [''] * len(tree.subfam) # List of subfam aas for r in range(alnlen): # <i> # Positions and aas _stage = '<5c-i> Output - Stats, positions & aas' aa = root.info['Sequence'][r] if query: qa = query.info['Sequence'][r] if qa != '-': qr += 1 for f in range(len(tree.subfam)): fa[f] = tree.subfam[f].obj['Sequence'].info['Sequence'][r] if fa[f] != '-': fr[f] += 1 # <ii> # Positions and AAs ii _stage = '<5c-ii> Output - Pos & AA ii' line = ['%d' % (r+1), aa] # Aln Pos and AA if query: if qa == '-': line += ['-',qa] # Qry Pos and AA else: line += ['%d' % qr,qa] # Qry Pos and AA for f in range(len(tree.subfam)): if fa[f] == '-': line += ['-',fa[f]] # Subfam Pos and AA else: line += ['%d' % fr[f],fa[f]] # Subfam Pos and AA # <iii> # Stats _stage = '<5c-iii> Output - Stats' for func in statlist: statobj = statObj(method=func,objlist=[specmatrix,conseq]) fs = func.lower() line.append(str(statobj.score[func][r])) # Score if rankout: line.append(str(statobj.rank[func][r])) # Rank if specmatrix.stat['WinSize'] > 1: line.append(str(statobj.alnwin[func][r])) # Full align window if rankout: line.append(str(statobj.alnrankwin[func][r])) # Rank if query: line.append(str(statobj.qrywin[func][r])) # Qry window if rankout: line.append(str(statobj.qryrankwin[func][r])) # Rank if func in funcspec: for f in range(len(tree.subfam)): line.append(str(statobj.famwin[func][tree.subfam[f]][r])) # Subfam windows if rankout: line.append(str(statobj.famrankwin[func][tree.subfam[f]][r])) # Subfam windows # <iv> # Writing _stage = '<5c-iv> Output - Writing' BADASP.write('%s\n' % string.join(line, delimit)) BADASP.close() out.verbose(0,2,'Done!',2) except: mainlog.errorLog('Fatal Error in BADASP Full output (%s):' % _stage,True) BADASP.write('%s\n' % string.join(line, delimit)) BADASP.close() try: ### <6> ### Partial Results Output _stage = '<6> Partial Output' ## <a> ## Setup _stage = '<6a> Output - Setup' # statlist & alnlen from above _part_append = False if out.stat['Interactive'] > 0 and rje.yesNo('Output additional, filtered results?',default='N'): partfile = rje.choice('Name for partial results file?:','%s.partial.badasp' % basename,confirm=True) if rje.checkForFile(partfile) and rje.yesNo('File %s exists. Append file without headers?' % partfile): _part_append = True else: return if rje.yesNo('Filter output columns?',default='N'): if rje.yesNo('Output query details (pos,aa & win)?') == False: query = None f = 1 for fam in tree.subfam[0:]: if rje.yesNo('Output subfam %d (%s) details (pos,aa & win)?' % (f,fam.info['CladeName'])) == False: tree.subfam.remove(fam) f += 1 for func in statlist[0:]: if rje.yesNo('Output %s results?' % func) == False: statlist.remove(func) alnout = [True] * alnlen if rje.yesNo('Filter Rows by Results VALUES?'): out.verbose(0,0,'Initial Defaults are minmum values. Accept intital default for no filtering of given Stat.',1) for stat in statlist: ### Filter by value? ### statobj = statObj(method=stat,objlist=[specmatrix,conseq]) scores = statobj.score[stat][0:] scores.sort() cutoff = rje.getFloat('Min. value for %s?:' % stat,default='%f' % scores[0],confirm=True) for r in range(alnlen): if statobj.score[stat][r] < cutoff: alnout[r] = False if rankout and rje.yesNo('Filter Rows by Results RANKS?'): out.verbose(0,0,'Ranks range from 0 (low) to 1 (high).',1) for stat in statlist: ### Filter by Rank? ### statobj = statObj(method=stat,objlist=[specmatrix,conseq]) cutoff = rje.getFloat('Min. rank for %s?:' % stat,default='0.0',confirm=True) for r in range(alnlen): if statobj.rank[stat][r] < cutoff: alnout[r] = False out.verbose(0,3,'\nBADASP Partial Results Output (%s) ...' % partfile,0) ## <b> ## Header _stage = '<6b> Partial Output - Header' if _part_append: BADASP = open(partfile, 'a') else: BADASP = open(partfile, 'w') BADASP.write("Partial BADASP Output: %s\n" % (time.asctime(time.localtime(time.time())))) BADASP.write('%s\n\n' % cmd_list) header = ['aln_pos','anc_aa'] # Aln Pos and AA _stage = '<6b-i> Partial Output - Header Query' if query: header += ['qry_pos','qry_aa'] # Qry Pos and AA _stage = '<6b-ii> Partial Output - Header Subfam' for f in range(len(tree.subfam)): header += ['fam%d_pos' % (f+1),'fam%d_aa' % (f+1)] # Subfam Pos and AA for func in statlist: _stage = '<6b-iii> Partial Output - Header %s' % func statobj = statObj(method=func,objlist=[specmatrix,conseq]) fs = func.lower() header.append(fs) # Score if rankout: header.append('%s_rank' % fs) # Rank if statobj.stat['WinSize'] > 1: header.append('%s_alnwin' % fs) # Full align window if rankout: header.append('%s_alnrankwin' % fs) # Rank if query: header.append('%s_qrywin' % fs) # Qry window if rankout: header.append('%s_qryrankwin' % fs) # Rank if func in funcspec: for f in range(len(tree.subfam)): header.append('%s_fam%d_win' % (fs,f+1)) # Subfam windows if rankout: header.append('%s_fam%d_rankwin' % (fs,f+1)) # Subfam windows #if not _part_append: BADASP.write('%s\n' % string.join(header, delimit)) out.verbose(1,3,'%s...' % string.join(header, delimit),0) ## <c> ## Stats _stage = '<6c> Stats' qr = 0 # Qry pos fr = [0] * len(tree.subfam) # List of subfam positions aa = '' # Root aa qa = '' # Qry aa fa = [''] * len(tree.subfam) # List of subfam aas for r in range(alnlen): if alnout[r] == False: continue # <i> # Positions and aas _stage = '<6c-i> Partial Output - Stats, positions & aas' aa = root.info['Sequence'][r] if query: qa = query.info['Sequence'][r] if qa != '-': qr += 1 for f in range(len(tree.subfam)): fa[f] = tree.subfam[f].obj['Sequence'].info['Sequence'][r] if fa[f] != '-': fr[f] += 1 # <ii> # Positions and AAs ii _stage = '<6c-ii> Partial Output - Pos & AA ii' line = ['%d' % (r+1), aa] # Aln Pos and AA if query: if qa == '-': line += ['-',qa] # Qry Pos and AA else: line += ['%d' % qr,qa] # Qry Pos and AA for f in range(len(tree.subfam)): if fa[f] == '-': line += ['-',fa[f]] # Subfam Pos and AA else: line += ['%d' % fr[f],fa[f]] # Subfam Pos and AA # <iii> # Stats _stage = '<6c-iii> Partial Output - Stats' for func in statlist: statobj = statObj(method=func,objlist=[specmatrix,conseq]) fs = func.lower() line.append(str(statobj.score[func][r])) # Score if rankout: line.append(str(statobj.rank[func][r])) # Rank if specmatrix.stat['WinSize'] > 1: line.append(str(statobj.alnwin[func][r])) # Full align window if rankout: line.append(str(statobj.alnrankwin[func][r])) # Rank if query: line.append(str(statobj.qrywin[func][r])) # Qry window if rankout: line.append(str(statobj.qryrankwin[func][r])) # Rank if func in funcspec: for f in range(len(tree.subfam)): line.append(str(statobj.famwin[func][tree.subfam[f]][r])) # Subfam windows if rankout: line.append(str(statobj.famrankwin[func][tree.subfam[f]][r])) # Subfam windows # <iv> # Writing _stage = '<6c-iv> Partial Output - Writing' BADASP.write('%s\n' % string.join(line, delimit)) BADASP.close() out.verbose(0,2,'Done!',2) except: mainlog.errorLog('Fatal Error in BADASP Partial output (%s):' % _stage,True) BADASP.write('%s\n' % string.join(line, delimit)) BADASP.close()