def convert(self,filelist=[],outfile=None): ### Converts scansite output files in FileList to Outfile ''' Converts scansite output files in FileList to Outfile. ''' try: ### Setup ### _stage = 'Setup' if len(filelist) < 1: filelist = self.list['FileList'] if not outfile: outfile = self.info['Name'] if len(filelist) < 1: self.log.errorLog('No scansite files to convert! %s unchanged/not made.' % outfile,printerror=False) return False delimit = rje.getDelimit(self.cmd_list) ext = rje.delimitExt(delimit) if ext != outfile[-3:]: newfile = outfile[:-3] + ext if rje.yesNo('Change file name from %s to %s?' % (outfile, newfile)): outfile = newfile self.log.printLog('#OUT','Converting %d file(s), output to %s.' % (len(filelist),outfile)) ### Output File ### _stage = 'Output File' if not self.opt['Append'] or not os.path.exists(outfile): # Create with header OUTFILE = open(outfile,'w') headers = ['seq_id','enzyme','enz_group','aa','pos','score','percentile','matchseq','sa'] rje.writeDelimit(OUTFILE,headers,delimit) else: OUTFILE = open(outfile,'a') ### Conversion ### _stage = 'Conversion' sx = 0 for infile in filelist: if not os.path.exists(infile): self.log.errorLog('Input file %s does not exist! :o(' % infile,False,False) continue fx = 0 INFILE = open(infile,'r') inline = rje.nextLine(INFILE) while inline != None: if rje.matchExp(re_scansite,inline): scanlist = rje.matchExp(re_scansite,inline) rje.writeDelimit(OUTFILE,scanlist,delimit) sx += 1 fx += 1 rje.progressPrint(self,sx) inline = rje.nextLine(INFILE) self.log.printLog('#OUT','%s scansite results from %s. (%s Total.)' % (rje.integerString(fx),infile,rje.integerString(sx))) INFILE.close() ### End ### _stage = 'End' OUTFILE.close() self.log.printLog('#OUT','%s scansite results output to %s.' % (rje.integerString(sx),outfile)) return True except: self.log.errorLog('Error in convert(%s)' % _stage,printerror=True,quitchoice=False) raise
def scap(self): ### Full SCAP method '''Full SCAP method.''' try:### ~ [1] Setup ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ### markov = self.obj['Markov'] minx = markov.stat['MinXmer'] maxx = markov.stat['MaxXmer'] headers = ['seq','type','sorted'] for x in range(minx,maxx+1): headers.append('X%d' % x) delimit = rje.getDelimit(self.cmd_list,'\t') scapfile = '%s.%s' % (self.info['Basefile'],rje.delimitExt(delimit)) rje.delimitedFileOutput(self,scapfile,headers,delimit,rje_backup=True) ### ~ [2] SCAP ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ### ## ~ [2a] Query ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ## (sx,stot) = (0.0,self.obj['SeqList'].seqNum()) for seq in self.obj['SeqList'].seq: self.progLog('\r#SCAP','SCAP processing Query to %s: %.2f%%' % (scapfile,(sx/stot))); sx += 100.0 datadict = {'seq':seq.shortName(),'type':'qry','sorted':markov.opt['Sorted']} for x in range(minx,maxx+1): datadict['X%d' % x] = self.scapSeq(seq.info['Sequence'],x) if datadict['X%d' % x] > 0.001: datadict['X%d' % x] = '%.4f' % datadict['X%d' % x] else: datadict['X%d' % x] = '%.3e' % datadict['X%d' % x] rje.delimitedFileOutput(self,scapfile,headers,delimit,datadict) self.printLog('\r#SCAP','SCAP processed Query to %s for %s sequences.' % (scapfile,rje.integerString(stot))) ## ~ [2b] Background ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ## if self.obj['ScapBack'] != self.obj['SeqList']: (sx,stot) = (0.0,self.obj['ScapBack'].seqNum()) for seq in self.obj['ScapBack'].seq: self.progLog('\r#SCAP','SCAP processing Background to %s: %.2f%%' % (scapfile,(sx/stot))); sx += 100.0 datadict = {'seq':seq.shortName(),'type':'bg','sorted':markov.opt['Sorted']} for x in range(minx,maxx+1): datadict['X%d' % x] = self.scapSeq(seq.info['Sequence'],x) if datadict['X%d' % x] > 0.001: datadict['X%d' % x] = '%.4f' % datadict['X%d' % x] else: datadict['X%d' % x] = '%.3e' % datadict['X%d' % x] rje.delimitedFileOutput(self,scapfile,headers,delimit,datadict) self.printLog('\r#SCAP','SCAP processed Background to %s for %s sequences.' % (scapfile,rje.integerString(stot))) if markov.opt['Sorted']: self.printLog('#SCAP','Sorted SCAP run complete') else: self.printLog('#SCAP','UnSorted SCAP run complete') except: self.errorLog(rje_zen.Zen().wisdom())
def _pepDis(self): ### Peptide Distance ''' Peptide Distance. ''' try: ### <0> ### Setup seqlist = rje_seq.SeqList(self.log, self.cmd_list + ['autoload=T']) dismatrix = rje_dismatrix.DisMatrix(self.log, self.cmd_list) dismatrix.info['Name'] = self.info['Method'] dismatrix.opt['Symmetric'] = True if self.info['Method'] in ['ds_prop', 'tot_prop', 'best_prop']: aaprop = rje_aaprop.AAPropMatrix(self.log, self.cmd_list) #aaprop.readAAProp() aaprop.makePropDif() elif self.info['Method'] == 'pam': pam = rje_pam.PamCtrl(log=self.log, cmd_list=self.cmd_list) ### <1> ### Make DisMatrix for seq1 in seqlist.seq: for seq2 in seqlist.seq: if seqlist.seq.index(seq1) > seqlist.seq.index( seq2): # No need to calculate - symmetrical! continue dis = 0 if seq1 == seq2 and self.info['OutMatrix'] == 'phylip': dis = 0 elif self.info['Method'] in ['ds_prop', 'ds_id']: (self_dis1, self_dis2) = (0, 0) for r1 in range(seq1.seqLen()): for r2 in range(r1, seq2.seqLen()): (a1, a2) = (seq1.info['Sequence'][r1], seq2.info['Sequence'][r2]) (s1, s2) = (seq1.info['Sequence'][r2], seq2.info['Sequence'][r1]) phys_dis = r2 - r1 if self.info['Method'] == 'ds_prop': dis += (aaprop.pdif['%s%s' % (a1, a2)] * (seq1.seqLen() - phys_dis)) self_dis1 += (aaprop.pdif['%s%s' % (a1, s1)] * (seq1.seqLen() - phys_dis)) self_dis2 += (aaprop.pdif['%s%s' % (a2, s2)] * (seq1.seqLen() - phys_dis)) elif self.info[ 'Method'] == 'ds_id' and a1 != a2: dis += (seq1.seqLen() - phys_dis) if self.info['Method'] == 'ds_id' and a1 != s1: self_dis1 += (seq1.seqLen() - phys_dis) if self.info['Method'] == 'ds_id' and a2 != s2: self_dis2 += (seq1.seqLen() - phys_dis) dis -= (self_dis1 + self_dis2) / 2.0 elif self.info['Method'] == 'tot_prop': proptot = {} for property in aaprop.prop.keys(): proptot[property] = {seq1: 0.0, seq2: 0.0} for seq in [seq1, seq2]: for r in range(seq.seqLen()): aa = seq.info['Sequence'][r] for property in aaprop.prop.keys(): proptot[property][seq] += string.atof( aaprop.prop[property][aa]) for property in aaprop.prop.keys(): if proptot[property][seq1] > proptot[property][ seq2]: dis += (proptot[property][seq1] - proptot[property][seq2]) else: dis += (proptot[property][seq2] - proptot[property][seq1]) elif self.info['Method'] == 'pam': dis = pam.pamML(ancseq=seq1.info['Sequence'], descseq=seq2.info['Sequence']) elif self.info['Method'] == 'best_prop': min_dis = seq1.seqLen() * len(aaprop.prop) pepseq1 = seq1.info['Sequence'] for c in range(seq1.seqLen()): # Circular start dis = 0 pepseq2 = seq2.info['Sequence'][c:] + seq2.info[ 'Sequence'][:c] for r in range(seq1.seqLen()): (a1, a2) = (pepseq1[r], pepseq2[r]) dis += aaprop.pdif['%s%s' % (a1, a2)] if dis < min_dis: min_dis = dis dis = min_dis dismatrix.addDis(seq1, seq2, dis) ### <2> ### Output if self.info['OutMatrix'] == 'phylip': delimit = ' ' format = 'phylip' else: delimit = rje.getDelimit(self.cmd_list, ',') format = 'None' outfile = '%s.%s.%s' % (rje.baseFile( seqlist.info['Name'], True), self.info['Method'], rje.delimitExt(delimit)) dismatrix.saveMatrix(seqlist.seq, outfile, delimit, format=format) except: self.log.errorLog('Error in _pepDis', printerror=True, quitchoice=False) raise # Delete this if method error not terrible
def convert(self, filelist=[], outfile=None ): ### Converts scansite output files in FileList to Outfile ''' Converts scansite output files in FileList to Outfile. ''' try: ### Setup ### _stage = 'Setup' if len(filelist) < 1: filelist = self.list['FileList'] if not outfile: outfile = self.info['Name'] if len(filelist) < 1: self.log.errorLog( 'No scansite files to convert! %s unchanged/not made.' % outfile, printerror=False) return False delimit = rje.getDelimit(self.cmd_list) ext = rje.delimitExt(delimit) if ext != outfile[-3:]: newfile = outfile[:-3] + ext if rje.yesNo('Change file name from %s to %s?' % (outfile, newfile)): outfile = newfile self.log.printLog( '#OUT', 'Converting %d file(s), output to %s.' % (len(filelist), outfile)) ### Output File ### _stage = 'Output File' if not self.opt['Append'] or not os.path.exists( outfile): # Create with header OUTFILE = open(outfile, 'w') headers = [ 'seq_id', 'enzyme', 'enz_group', 'aa', 'pos', 'score', 'percentile', 'matchseq', 'sa' ] rje.writeDelimit(OUTFILE, headers, delimit) else: OUTFILE = open(outfile, 'a') ### Conversion ### _stage = 'Conversion' sx = 0 for infile in filelist: if not os.path.exists(infile): self.log.errorLog( 'Input file %s does not exist! :o(' % infile, False, False) continue fx = 0 INFILE = open(infile, 'r') inline = rje.nextLine(INFILE) while inline != None: if rje.matchExp(re_scansite, inline): scanlist = rje.matchExp(re_scansite, inline) rje.writeDelimit(OUTFILE, scanlist, delimit) sx += 1 fx += 1 rje.progressPrint(self, sx) inline = rje.nextLine(INFILE) self.log.printLog( '#OUT', '%s scansite results from %s. (%s Total.)' % (rje.integerString(fx), infile, rje.integerString(sx))) INFILE.close() ### End ### _stage = 'End' OUTFILE.close() self.log.printLog( '#OUT', '%s scansite results output to %s.' % (rje.integerString(sx), outfile)) return True except: self.log.errorLog('Error in convert(%s)' % _stage, printerror=True, quitchoice=False) raise
def hmmTable(self,outfile='',append=False,delimit=None): ### Outputs results table ''' Outputs results table. >> outfile:str = Name of output file >> append:boolean = whether to append file >> delimit:str = Delimiter to use [\t] ''' try: ### Setup ### if not outfile: outfile = self.info['HMMTab'] if outfile.lower() == 'none': self.log.printLog('#TAB','HMMTab = "None": No table output') return False if not delimit: delimit = rje.getDelimit(self.cmd_list,'\t') if not outfile: outfile = '%s.hmmer.%s' % (rje.baseFile(self.info['SearchDB'],True),rje.delimitExt(delimit)) self.readResults() self.log.printLog('#TAB','Tabulating results for %s searches into %s' % (len(self.search),outfile),log=False) ### Setup Resfile ### if self.opt['MySQL']: headers = ['HMM','Hit','Hit_Start','Hit_End','Eval','Score'] else: headers = ['Type','Name','Start','End','Eval','Score'] if not append or not os.path.exists(outfile): rje.delimitedFileOutput(self,outfile,headers,delimit,rje_backup=True) ### Output Search details ### for search in self.search: for hit in search.hit: for aln in hit.aln: out = {'HMM':search.info['Name'],'Type':search.info['Name'], 'Name':hit.info['Name'],'Hit':hit.info['Name'], 'Start':'%d' % aln.stat['SbjStart'], 'End':'%d' % aln.stat['SbjEnd'], 'Hit_Start':'%d' % aln.stat['SbjStart'], 'Hit_End':'%d' % aln.stat['SbjEnd'], 'Eval':'%.2e' % aln.stat['Expect'],'Score':'%.1f' % aln.stat['BitScore']} rje.delimitedFileOutput(self,outfile,headers,delimit,out) self.log.printLog('#OUT','Results for %s searches output to %s.' % (len(self.search),outfile)) except: self.log.errorLog('Fatal Error during hmmTable(%s).' % outfile) raise
def _setupOutput(self): ### Sets up output files self.str['MapFas','MissFas','MapRes'] '''Sets up output files self.str['MapFas','MissFas','MapRes'].''' ### ~ [0] Setup ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ### delimit = rje.getDelimit(self.cmd_list) if self.str['StartFrom'].lower() in ['','none']: self.str['StartFrom'] = '' else: self.bool['Append'] = True self.printLog('#CMD','StartFrom = "%s" so Append=T' % self.str['StartFrom']) ### ~ [1] General ResFile ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ### files = {'MapFas':'mapping.fas','MissFas':'missing.fas','MapRes':'mapping.%s' % rje.delimitExt(delimit)} if self.getBool('Combine'): files.pop('MissFas') if self.str['ResFile'].lower() in ['','none']: self.str['ResFile'] = '%s.%s' % (rje.baseFile(self.str['SeqIn']),rje.baseFile(self.str['MapDB'],strip_path=True)) for file in files.keys(): self.setStr({file: self.getStr('ResFile') + '.' + files[file]}) rje.backup(self,self.getStr(file)) ### ~ [2] Headers for MapRes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ### #!# Consider replacing with rje_db object? #!# self.list['Headers'] = ['Query','Hit','Method','MapRank','BlastRank','EVal','Score'] for qh in ['Query','Hit']: self.list['Headers'] += ['%s_Species' % qh] if self.bool['GablamOut']: for st in ['Len','Sim','ID']: self.list['Headers'] += ['%s_%s' % (qh,st)] rje.delimitedFileOutput(self,self.str['MapRes'],self.list['Headers'],delimit)
def hmmTable(self, outfile='', append=False, delimit=None): ### Outputs results table ''' Outputs results table. >> outfile:str = Name of output file >> append:boolean = whether to append file >> delimit:str = Delimiter to use [\t] ''' try: ### Setup ### if not outfile: outfile = self.info['HMMTab'] if outfile.lower() == 'none': self.log.printLog('#TAB', 'HMMTab = "None": No table output') return False if not delimit: delimit = rje.getDelimit(self.cmd_list, '\t') if not outfile: outfile = '%s.hmmer.%s' % (rje.baseFile( self.info['SearchDB'], True), rje.delimitExt(delimit)) self.readResults() self.log.printLog('#TAB', 'Tabulating results for %s searches into %s' % (len(self.search), outfile), log=False) ### Setup Resfile ### if self.opt['MySQL']: headers = [ 'HMM', 'Hit', 'Hit_Start', 'Hit_End', 'Eval', 'Score' ] else: headers = ['Type', 'Name', 'Start', 'End', 'Eval', 'Score'] if not append or not os.path.exists(outfile): rje.delimitedFileOutput(self, outfile, headers, delimit, rje_backup=True) ### Output Search details ### for search in self.search: for hit in search.hit: for aln in hit.aln: out = { 'HMM': search.info['Name'], 'Type': search.info['Name'], 'Name': hit.info['Name'], 'Hit': hit.info['Name'], 'Start': '%d' % aln.stat['SbjStart'], 'End': '%d' % aln.stat['SbjEnd'], 'Hit_Start': '%d' % aln.stat['SbjStart'], 'Hit_End': '%d' % aln.stat['SbjEnd'], 'Eval': '%.2e' % aln.stat['Expect'], 'Score': '%.1f' % aln.stat['BitScore'] } rje.delimitedFileOutput(self, outfile, headers, delimit, out) self.log.printLog( '#OUT', 'Results for %s searches output to %s.' % (len(self.search), outfile)) except: self.log.errorLog('Fatal Error during hmmTable(%s).' % outfile) raise
def _pepDis(self): ### Peptide Distance ''' Peptide Distance. ''' try: ### <0> ### Setup seqlist = rje_seq.SeqList(self.log,self.cmd_list+['autoload=T']) dismatrix = rje_dismatrix.DisMatrix(self.log,self.cmd_list) dismatrix.info['Name'] = self.info['Method'] dismatrix.opt['Symmetric'] = True if self.info['Method'] in ['ds_prop','tot_prop','best_prop']: aaprop = rje_aaprop.AAPropMatrix(self.log,self.cmd_list) #aaprop.readAAProp() aaprop.makePropDif() elif self.info['Method'] == 'pam': pam = rje_pam.PamCtrl(log=self.log,cmd_list=self.cmd_list) ### <1> ### Make DisMatrix for seq1 in seqlist.seq: for seq2 in seqlist.seq: if seqlist.seq.index(seq1) > seqlist.seq.index(seq2): # No need to calculate - symmetrical! continue dis = 0 if seq1 == seq2 and self.info['OutMatrix'] == 'phylip': dis = 0 elif self.info['Method'] in ['ds_prop','ds_id']: (self_dis1,self_dis2) = (0,0) for r1 in range(seq1.seqLen()): for r2 in range(r1,seq2.seqLen()): (a1,a2) = (seq1.info['Sequence'][r1],seq2.info['Sequence'][r2]) (s1,s2) = (seq1.info['Sequence'][r2],seq2.info['Sequence'][r1]) phys_dis = r2 - r1 if self.info['Method'] == 'ds_prop': dis += (aaprop.pdif['%s%s' % (a1,a2)] * (seq1.seqLen() - phys_dis)) self_dis1 += (aaprop.pdif['%s%s' % (a1,s1)] * (seq1.seqLen() - phys_dis)) self_dis2 += (aaprop.pdif['%s%s' % (a2,s2)] * (seq1.seqLen() - phys_dis)) elif self.info['Method'] == 'ds_id' and a1 != a2: dis += (seq1.seqLen() - phys_dis) if self.info['Method'] == 'ds_id' and a1 != s1: self_dis1 += (seq1.seqLen() - phys_dis) if self.info['Method'] == 'ds_id' and a2 != s2: self_dis2 += (seq1.seqLen() - phys_dis) dis -= (self_dis1 + self_dis2) / 2.0 elif self.info['Method'] == 'tot_prop': proptot = {} for property in aaprop.prop.keys(): proptot[property] = {seq1:0.0,seq2:0.0} for seq in [seq1,seq2]: for r in range(seq.seqLen()): aa = seq.info['Sequence'][r] for property in aaprop.prop.keys(): proptot[property][seq] += string.atof(aaprop.prop[property][aa]) for property in aaprop.prop.keys(): if proptot[property][seq1] > proptot[property][seq2]: dis += (proptot[property][seq1] - proptot[property][seq2]) else: dis += (proptot[property][seq2] - proptot[property][seq1]) elif self.info['Method'] == 'pam': dis = pam.pamML(ancseq=seq1.info['Sequence'],descseq=seq2.info['Sequence']) elif self.info['Method'] == 'best_prop': min_dis = seq1.seqLen() * len(aaprop.prop) pepseq1 = seq1.info['Sequence'] for c in range(seq1.seqLen()): # Circular start dis = 0 pepseq2 = seq2.info['Sequence'][c:] + seq2.info['Sequence'][:c] for r in range(seq1.seqLen()): (a1,a2) = (pepseq1[r],pepseq2[r]) dis += aaprop.pdif['%s%s' % (a1,a2)] if dis < min_dis: min_dis = dis dis = min_dis dismatrix.addDis(seq1,seq2,dis) ### <2> ### Output if self.info['OutMatrix'] == 'phylip': delimit = ' ' format = 'phylip' else: delimit = rje.getDelimit(self.cmd_list,',') format = 'None' outfile = '%s.%s.%s' % (rje.baseFile(seqlist.info['Name'],True),self.info['Method'],rje.delimitExt(delimit)) dismatrix.saveMatrix(seqlist.seq,outfile,delimit,format=format) except: self.log.errorLog('Error in _pepDis',printerror=True,quitchoice=False) raise # Delete this if method error not terrible
def _pepStats(self): ### Peptide Distance ''' Peptide Distance. ''' try: ### Setup ### seqlist = rje_seq.SeqList(self.log, self.cmd_list + ['autoload=T']) aaprop = rje_aaprop.AAPropMatrix(self.log, self.cmd_list) aaprop.makePropDif() delimit = rje.getDelimit(self.cmd_list) ### Output File Setup ### OUTFILE = open('hrb.pepstats.%s' % rje.delimitExt(delimit), 'w') headlist = ['peptide'] ## 10 Dimensional Peptide Property Output ## for property in rje.sortKeys(aaprop.prop): headlist.append(property.lower()) for aa in aaprop.prop[property].keys(): try: if aa not in ['-', 'X']: aaprop.prop[property][aa] = string.atoi( aaprop.prop[property][aa]) except: print aaprop.prop, property, aa, aaprop.prop[property][ aa] raise ## Additional Stats ## headlist.append('net_charge') #headlist.append('hydrophobicity') headlist.append('charge_balance') headlist.append('hydrophobic_balance') #headlist.append('hydrophobicity_balance') ## Output rje.writeDelimit(OUTFILE, headlist, delimit) ### Calculate stats ### for pep in seqlist.seq: pepname = pep.shortName() if rje.matchExp('^(\S+_\d[CQ])', pepname): pepname = rje.matchExp('^(\S+_\d[CQ])', pepname)[0] outlist = [pepname] pepseq = pep.info['Sequence'] ## 10 Dimensional Peptide Property Output ## for property in rje.sortKeys(aaprop.prop): px = 0 for aa in pepseq: px += aaprop.prop[property][aa] outlist.append('%d' % px) ## Additional Stats ## net_charge = 0 for aa in pepseq: net_charge += (aaprop.prop['Positive'][aa] - aaprop.prop['Negative'][aa]) outlist.append('%d' % net_charge) charge_balance = 0 hydrophobic_balance = 0 for r in range(len(pepseq)): charge_balance += aaprop.prop['Charged'][pepseq[r]] * ( 1.0 / (r + 1)) charge_balance -= aaprop.prop['Charged'][pepseq[r]] * ( 1.0 / (10 - r)) hydrophobic_balance += aaprop.prop['Hydrophobic'][ pepseq[r]] * (1.0 / (r + 1)) hydrophobic_balance -= aaprop.prop['Hydrophobic'][ pepseq[r]] * (1.0 / (10 - r)) outlist.append('%.3f' % charge_balance) outlist.append('%.3f' % hydrophobic_balance) rje.writeDelimit(OUTFILE, outlist, delimit) ### Finish ### OUTFILE.close() except: self.log.errorLog('Error in _pepStats', printerror=True, quitchoice=False) raise # Delete this if method error not terrible
def run(self): ### Main Run method ''' Main Run method. ''' try: ### SLiMDisc Run ### if self.opt['SLiMDisc']: return self.slimDisc() ### TEIRESIAS ### if self.opt['Teiresias']: ## Setup ## seqlist = rje_seq.SeqList(self.log,self.cmd_list) infile = '%s.teiresias.fas' % rje.baseFile(seqlist.info['Name'],True) outfile = '%s.teiresias.out' % rje.baseFile(seqlist.info['Name'],True) run_teiresias = True if rje.isYounger(outfile,infile) == outfile: if self.stat['Interactive'] < 1 or not rje.yesNo('%s and %s exist already. Regenerate?' % (infile,outfile),'N'): run_teiresias = False ## Run TEIRESIAS ## if run_teiresias: seqlist.saveFasta(seqfile=infile,name='Teiresias') ### Saves sequences in fasta format command = rje.makePath(self.info['TeiresiasPath'],True) command += ' -i%s -o%s %s' % (infile,outfile,self.info['TeiresiasOpt']) self.log.printLog('#CMD',command) os.system(command) ## Read Results ## self.verbose(0,2,'Reading TEIRESIAS output from %s...' % outfile,1) self.list['Pattern'] = [] RESULTS = open(outfile,'r') line = RESULTS.readline() while line: if rje.matchExp('^(\d+)\s+(\d+)\s+(\S+)\s+(\d.+\d)$',line): # New pattern self.addTeiresiasPattern(rje.matchExp('^(\d+)\s+(\d+)\s+(\S+)\s+(\d.+\d)$',line)) elif len(line) > 3 and line[0] != '#': self.log.errorLog('Did not recognise line: %s' % line,False,False) line = RESULTS.readline() RESULTS.close() patx = len(self.list['Pattern']) self.log.printLog('#PAT','%s TEIRESIAS patterns read from %s.' % (rje.integerString(patx),outfile)) ## Calculate Information Content ## aafreq = seqlist.aaFreq() self.verbose(0,3,'Calculating Information Content & Length stats...',0) occx = 0 for pattern in self.list['Pattern']: pattern.stat['Info'] = self.calculateScore(pattern.info['Pattern'],aafreq) pattern._makeLength() occx += 1 rje.progressPrint(self,occx,patx/100,patx/10) self.verbose(0,1,'...Done!',2) ## Prepare Results ## delimit = rje.getDelimit(self.cmd_list) if self.info['Name'] == 'None': self.info['Name'] = '%s.teiresias.%s' % (rje.baseFile(seqlist.info['Name'],True),rje.delimitExt(delimit)) if self.opt['MySQL']: # Two tables patfile = os.path.splitext(self.info['Name']) occfile = '%s.occ%s' % (patfile[0],patfile[1]) patfile = '%s.patterns%s' % (patfile[0],patfile[1]) if self.opt['Append']: PATFILE = open(patfile,'a') OCCFILE = open(occfile,'a') else: PATFILE = open(patfile,'w') rje.writeDelimit(PATFILE,['pattern','tot_occ','seq_occ','info','len','fix','wild'],delimit) OCCFILE = open(occfile,'a') rje.writeDelimit(OCCFILE,['seq_id','pos','pattern','pat_match'],delimit) else: if self.opt['Append']: RESFILE = open(self.info['Name'],'a') else: RESFILE = open(patfile,'w') rje.writeDelimit(RESFILE,['Sequence Name','Position','Pattern','Match','Total Occurrences','Num Sequences','Information Content','Length','Fixed','Wildcard'],delimit) ## Save Results ## occx = 0 for pattern in self.list['Pattern']: patstats = [] for stat in ['OccCount','SeqCount','Info','Length','Fixed','Wildcards']: patstats.append('%d' % pattern.stat[stat]) patstats[2] = '%.3f' % pattern.stat['Info'] if self.opt['MySQL']: # Two tables rje.writeDelimit(PATFILE,[pattern.info['Pattern']] + patstats,delimit) for occ in rje.sortKeys(pattern.occ): seq = seqlist.seq[occ] for pos in pattern.occ[occ]: match = seq.info['Sequence'][pos:(pos+pattern.stat['Length'])] outlist = [seq.shortName(),'%d' % pos,pattern.info['Pattern'],match] if self.opt['MySQL']: # Two tables rje.writeDelimit(OCCFILE,outlist,delimit) else: rje.writeDelimit(RESFILE,outlist+patstats,delimit) occx += 1 if self.opt['MySQL']: # Two tables PATFILE.close() OCCFILE.close() self.log.printLog('#OUT','%s patterns output to %s.' % (rje.integerString(patx),patfile)) self.log.printLog('#OUT','%s pattern occurrences output to %s.' % (rje.integerString(occx),occfile)) else: RESFILE.close() self.log.printLog('#OUT','%s occurrences of %s patterns output to %s.' % (rje.integerString(occx),rje.integerString(patx),self.info['Name'])) ### InfoContent ### elif self.info['Info'] != 'None': ## Setup ## alphabet = rje_seq.alph_protx if not os.path.exists(self.info['Info']): self.log.errorLog('Input file %s missing!' % self.info['Info'],False,False) return False else: mypresto = presto.Presto(self.log,self.cmd_list) mypresto.loadMotifs(file=self.info['Info'],clear=True) seqlist = rje_seq.SeqList(self.log,self.cmd_list+['autoload=T']) if seqlist.seqNum() > 0: aafreq = seqlist.aaFreq(alphabet=None,fromfile=None,loadfile=None,total=False) ### Returns dictionary of AA (& gap etc.) frequencies else: aafreq = {} for aa in alphabet: aafreq[aa] = 1.0 / len(alphabet) alphabet = aafreq.keys() maxinfo = 0 for aa in alphabet: maxinfo += (aafreq[aa] * math.log(aafreq[aa],2)) ## Output ## delimit = rje.getDelimit(self.cmd_list) ext = rje.delimitExt(delimit) outfile = '%s.info.%s' % (rje.baseFile(self.info['Info'],True,['.txt','.%s' % ext]),ext) if self.opt['Append']: OUTFILE = open(outfile,'a') else: OUTFILE = open(outfile,'w') rje.writeDelimit(OUTFILE,['motif','pattern','info'],delimit) ## Calculate Information Scores ## for motif in mypresto.motif: self.verbose(2,4,motif.info['Sequence'],0) pattern = string.replace(motif.info['Sequence'],'X','.') elements = string.split(pattern,'-') pattern = '' for el in elements: if el.find('.{') == 0: # Ambiguous spacer length - compress pattern += '.' else: pattern += el self.verbose(2,2,'=> %s' % pattern,1) motif.stat['Info'] = self.calculateInformationContent(pattern,aafreq,maxinfo,self.stat['InfoGapPen']) self.verbose(0,3,'%s (%s) = %.2f' % (motif.info['Name'],pattern,motif.stat['Info']),1) ## Output ## rje.writeDelimit(OUTFILE,[motif.info['Name'],pattern,'%.2f' % motif.stat['Info']],delimit) ## Finish ## OUTFILE.close() except: self.log.errorLog('Error in run().',printerror=True,quitchoice=False) raise # Delete this if method error not terrible
def _pepStats(self): ### Peptide Distance ''' Peptide Distance. ''' try: ### Setup ### seqlist = rje_seq.SeqList(self.log,self.cmd_list+['autoload=T']) aaprop = rje_aaprop.AAPropMatrix(self.log,self.cmd_list) aaprop.makePropDif() delimit = rje.getDelimit(self.cmd_list) ### Output File Setup ### OUTFILE = open('hrb.pepstats.%s' % rje.delimitExt(delimit),'w') headlist = ['peptide'] ## 10 Dimensional Peptide Property Output ## for property in rje.sortKeys(aaprop.prop): headlist.append(property.lower()) for aa in aaprop.prop[property].keys(): try: if aa not in ['-','X']: aaprop.prop[property][aa] = string.atoi(aaprop.prop[property][aa]) except: print aaprop.prop, property, aa, aaprop.prop[property][aa] raise ## Additional Stats ## headlist.append('net_charge') #headlist.append('hydrophobicity') headlist.append('charge_balance') headlist.append('hydrophobic_balance') #headlist.append('hydrophobicity_balance') ## Output rje.writeDelimit(OUTFILE,headlist,delimit) ### Calculate stats ### for pep in seqlist.seq: pepname = pep.shortName() if rje.matchExp('^(\S+_\d[CQ])',pepname): pepname = rje.matchExp('^(\S+_\d[CQ])',pepname)[0] outlist = [pepname] pepseq = pep.info['Sequence'] ## 10 Dimensional Peptide Property Output ## for property in rje.sortKeys(aaprop.prop): px = 0 for aa in pepseq: px += aaprop.prop[property][aa] outlist.append('%d' % px) ## Additional Stats ## net_charge = 0 for aa in pepseq: net_charge += (aaprop.prop['Positive'][aa] - aaprop.prop['Negative'][aa]) outlist.append('%d' % net_charge) charge_balance = 0 hydrophobic_balance = 0 for r in range(len(pepseq)): charge_balance += aaprop.prop['Charged'][pepseq[r]] * (1.0 / (r+1)) charge_balance -= aaprop.prop['Charged'][pepseq[r]] * (1.0 / (10-r)) hydrophobic_balance += aaprop.prop['Hydrophobic'][pepseq[r]] * (1.0 / (r+1)) hydrophobic_balance -= aaprop.prop['Hydrophobic'][pepseq[r]] * (1.0 / (10-r)) outlist.append('%.3f' % charge_balance) outlist.append('%.3f' % hydrophobic_balance) rje.writeDelimit(OUTFILE,outlist,delimit) ### Finish ### OUTFILE.close() except: self.log.errorLog('Error in _pepStats',printerror=True,quitchoice=False) raise # Delete this if method error not terrible