def convert(self,filelist=[],outfile=None): ### Converts scansite output files in FileList to Outfile ''' Converts scansite output files in FileList to Outfile. ''' try: ### Setup ### _stage = 'Setup' if len(filelist) < 1: filelist = self.list['FileList'] if not outfile: outfile = self.info['Name'] if len(filelist) < 1: self.log.errorLog('No scansite files to convert! %s unchanged/not made.' % outfile,printerror=False) return False delimit = rje.getDelimit(self.cmd_list) ext = rje.delimitExt(delimit) if ext != outfile[-3:]: newfile = outfile[:-3] + ext if rje.yesNo('Change file name from %s to %s?' % (outfile, newfile)): outfile = newfile self.log.printLog('#OUT','Converting %d file(s), output to %s.' % (len(filelist),outfile)) ### Output File ### _stage = 'Output File' if not self.opt['Append'] or not os.path.exists(outfile): # Create with header OUTFILE = open(outfile,'w') headers = ['seq_id','enzyme','enz_group','aa','pos','score','percentile','matchseq','sa'] rje.writeDelimit(OUTFILE,headers,delimit) else: OUTFILE = open(outfile,'a') ### Conversion ### _stage = 'Conversion' sx = 0 for infile in filelist: if not os.path.exists(infile): self.log.errorLog('Input file %s does not exist! :o(' % infile,False,False) continue fx = 0 INFILE = open(infile,'r') inline = rje.nextLine(INFILE) while inline != None: if rje.matchExp(re_scansite,inline): scanlist = rje.matchExp(re_scansite,inline) rje.writeDelimit(OUTFILE,scanlist,delimit) sx += 1 fx += 1 rje.progressPrint(self,sx) inline = rje.nextLine(INFILE) self.log.printLog('#OUT','%s scansite results from %s. (%s Total.)' % (rje.integerString(fx),infile,rje.integerString(sx))) INFILE.close() ### End ### _stage = 'End' OUTFILE.close() self.log.printLog('#OUT','%s scansite results output to %s.' % (rje.integerString(sx),outfile)) return True except: self.log.errorLog('Error in convert(%s)' % _stage,printerror=True,quitchoice=False) raise
def convert(self, filelist=[], outfile=None ): ### Converts scansite output files in FileList to Outfile ''' Converts scansite output files in FileList to Outfile. ''' try: ### Setup ### _stage = 'Setup' if len(filelist) < 1: filelist = self.list['FileList'] if not outfile: outfile = self.info['Name'] if len(filelist) < 1: self.log.errorLog( 'No scansite files to convert! %s unchanged/not made.' % outfile, printerror=False) return False delimit = rje.getDelimit(self.cmd_list) ext = rje.delimitExt(delimit) if ext != outfile[-3:]: newfile = outfile[:-3] + ext if rje.yesNo('Change file name from %s to %s?' % (outfile, newfile)): outfile = newfile self.log.printLog( '#OUT', 'Converting %d file(s), output to %s.' % (len(filelist), outfile)) ### Output File ### _stage = 'Output File' if not self.opt['Append'] or not os.path.exists( outfile): # Create with header OUTFILE = open(outfile, 'w') headers = [ 'seq_id', 'enzyme', 'enz_group', 'aa', 'pos', 'score', 'percentile', 'matchseq', 'sa' ] rje.writeDelimit(OUTFILE, headers, delimit) else: OUTFILE = open(outfile, 'a') ### Conversion ### _stage = 'Conversion' sx = 0 for infile in filelist: if not os.path.exists(infile): self.log.errorLog( 'Input file %s does not exist! :o(' % infile, False, False) continue fx = 0 INFILE = open(infile, 'r') inline = rje.nextLine(INFILE) while inline != None: if rje.matchExp(re_scansite, inline): scanlist = rje.matchExp(re_scansite, inline) rje.writeDelimit(OUTFILE, scanlist, delimit) sx += 1 fx += 1 rje.progressPrint(self, sx) inline = rje.nextLine(INFILE) self.log.printLog( '#OUT', '%s scansite results from %s. (%s Total.)' % (rje.integerString(fx), infile, rje.integerString(sx))) INFILE.close() ### End ### _stage = 'End' OUTFILE.close() self.log.printLog( '#OUT', '%s scansite results output to %s.' % (rje.integerString(sx), outfile)) return True except: self.log.errorLog('Error in convert(%s)' % _stage, printerror=True, quitchoice=False) raise
def run(self): ### Main Run method ''' Main Run method. ''' try: ### SLiMDisc Run ### if self.opt['SLiMDisc']: return self.slimDisc() ### TEIRESIAS ### if self.opt['Teiresias']: ## Setup ## seqlist = rje_seq.SeqList(self.log,self.cmd_list) infile = '%s.teiresias.fas' % rje.baseFile(seqlist.info['Name'],True) outfile = '%s.teiresias.out' % rje.baseFile(seqlist.info['Name'],True) run_teiresias = True if rje.isYounger(outfile,infile) == outfile: if self.stat['Interactive'] < 1 or not rje.yesNo('%s and %s exist already. Regenerate?' % (infile,outfile),'N'): run_teiresias = False ## Run TEIRESIAS ## if run_teiresias: seqlist.saveFasta(seqfile=infile,name='Teiresias') ### Saves sequences in fasta format command = rje.makePath(self.info['TeiresiasPath'],True) command += ' -i%s -o%s %s' % (infile,outfile,self.info['TeiresiasOpt']) self.log.printLog('#CMD',command) os.system(command) ## Read Results ## self.verbose(0,2,'Reading TEIRESIAS output from %s...' % outfile,1) self.list['Pattern'] = [] RESULTS = open(outfile,'r') line = RESULTS.readline() while line: if rje.matchExp('^(\d+)\s+(\d+)\s+(\S+)\s+(\d.+\d)$',line): # New pattern self.addTeiresiasPattern(rje.matchExp('^(\d+)\s+(\d+)\s+(\S+)\s+(\d.+\d)$',line)) elif len(line) > 3 and line[0] != '#': self.log.errorLog('Did not recognise line: %s' % line,False,False) line = RESULTS.readline() RESULTS.close() patx = len(self.list['Pattern']) self.log.printLog('#PAT','%s TEIRESIAS patterns read from %s.' % (rje.integerString(patx),outfile)) ## Calculate Information Content ## aafreq = seqlist.aaFreq() self.verbose(0,3,'Calculating Information Content & Length stats...',0) occx = 0 for pattern in self.list['Pattern']: pattern.stat['Info'] = self.calculateScore(pattern.info['Pattern'],aafreq) pattern._makeLength() occx += 1 rje.progressPrint(self,occx,patx/100,patx/10) self.verbose(0,1,'...Done!',2) ## Prepare Results ## delimit = rje.getDelimit(self.cmd_list) if self.info['Name'] == 'None': self.info['Name'] = '%s.teiresias.%s' % (rje.baseFile(seqlist.info['Name'],True),rje.delimitExt(delimit)) if self.opt['MySQL']: # Two tables patfile = os.path.splitext(self.info['Name']) occfile = '%s.occ%s' % (patfile[0],patfile[1]) patfile = '%s.patterns%s' % (patfile[0],patfile[1]) if self.opt['Append']: PATFILE = open(patfile,'a') OCCFILE = open(occfile,'a') else: PATFILE = open(patfile,'w') rje.writeDelimit(PATFILE,['pattern','tot_occ','seq_occ','info','len','fix','wild'],delimit) OCCFILE = open(occfile,'a') rje.writeDelimit(OCCFILE,['seq_id','pos','pattern','pat_match'],delimit) else: if self.opt['Append']: RESFILE = open(self.info['Name'],'a') else: RESFILE = open(patfile,'w') rje.writeDelimit(RESFILE,['Sequence Name','Position','Pattern','Match','Total Occurrences','Num Sequences','Information Content','Length','Fixed','Wildcard'],delimit) ## Save Results ## occx = 0 for pattern in self.list['Pattern']: patstats = [] for stat in ['OccCount','SeqCount','Info','Length','Fixed','Wildcards']: patstats.append('%d' % pattern.stat[stat]) patstats[2] = '%.3f' % pattern.stat['Info'] if self.opt['MySQL']: # Two tables rje.writeDelimit(PATFILE,[pattern.info['Pattern']] + patstats,delimit) for occ in rje.sortKeys(pattern.occ): seq = seqlist.seq[occ] for pos in pattern.occ[occ]: match = seq.info['Sequence'][pos:(pos+pattern.stat['Length'])] outlist = [seq.shortName(),'%d' % pos,pattern.info['Pattern'],match] if self.opt['MySQL']: # Two tables rje.writeDelimit(OCCFILE,outlist,delimit) else: rje.writeDelimit(RESFILE,outlist+patstats,delimit) occx += 1 if self.opt['MySQL']: # Two tables PATFILE.close() OCCFILE.close() self.log.printLog('#OUT','%s patterns output to %s.' % (rje.integerString(patx),patfile)) self.log.printLog('#OUT','%s pattern occurrences output to %s.' % (rje.integerString(occx),occfile)) else: RESFILE.close() self.log.printLog('#OUT','%s occurrences of %s patterns output to %s.' % (rje.integerString(occx),rje.integerString(patx),self.info['Name'])) ### InfoContent ### elif self.info['Info'] != 'None': ## Setup ## alphabet = rje_seq.alph_protx if not os.path.exists(self.info['Info']): self.log.errorLog('Input file %s missing!' % self.info['Info'],False,False) return False else: mypresto = presto.Presto(self.log,self.cmd_list) mypresto.loadMotifs(file=self.info['Info'],clear=True) seqlist = rje_seq.SeqList(self.log,self.cmd_list+['autoload=T']) if seqlist.seqNum() > 0: aafreq = seqlist.aaFreq(alphabet=None,fromfile=None,loadfile=None,total=False) ### Returns dictionary of AA (& gap etc.) frequencies else: aafreq = {} for aa in alphabet: aafreq[aa] = 1.0 / len(alphabet) alphabet = aafreq.keys() maxinfo = 0 for aa in alphabet: maxinfo += (aafreq[aa] * math.log(aafreq[aa],2)) ## Output ## delimit = rje.getDelimit(self.cmd_list) ext = rje.delimitExt(delimit) outfile = '%s.info.%s' % (rje.baseFile(self.info['Info'],True,['.txt','.%s' % ext]),ext) if self.opt['Append']: OUTFILE = open(outfile,'a') else: OUTFILE = open(outfile,'w') rje.writeDelimit(OUTFILE,['motif','pattern','info'],delimit) ## Calculate Information Scores ## for motif in mypresto.motif: self.verbose(2,4,motif.info['Sequence'],0) pattern = string.replace(motif.info['Sequence'],'X','.') elements = string.split(pattern,'-') pattern = '' for el in elements: if el.find('.{') == 0: # Ambiguous spacer length - compress pattern += '.' else: pattern += el self.verbose(2,2,'=> %s' % pattern,1) motif.stat['Info'] = self.calculateInformationContent(pattern,aafreq,maxinfo,self.stat['InfoGapPen']) self.verbose(0,3,'%s (%s) = %.2f' % (motif.info['Name'],pattern,motif.stat['Info']),1) ## Output ## rje.writeDelimit(OUTFILE,[motif.info['Name'],pattern,'%.2f' % motif.stat['Info']],delimit) ## Finish ## OUTFILE.close() except: self.log.errorLog('Error in run().',printerror=True,quitchoice=False) raise # Delete this if method error not terrible