コード例 #1
0
ファイル: rje_scansite.py プロジェクト: kwikwag/SLiMSuite
    def convert(self,filelist=[],outfile=None):      ### Converts scansite output files in FileList to Outfile
        '''
        Converts scansite output files in FileList to Outfile.
        '''
        try:
            ### Setup ###
            _stage = 'Setup'
            if len(filelist) < 1:
                filelist = self.list['FileList']
            if not outfile:
                outfile = self.info['Name']          
            if len(filelist) < 1:
                self.log.errorLog('No scansite files to convert! %s unchanged/not made.' % outfile,printerror=False)
                return False
            delimit = rje.getDelimit(self.cmd_list)
            ext = rje.delimitExt(delimit)
            if ext != outfile[-3:]:
                newfile = outfile[:-3] + ext
                if rje.yesNo('Change file name from %s to %s?' % (outfile, newfile)):
                    outfile = newfile
            self.log.printLog('#OUT','Converting %d file(s), output to %s.' % (len(filelist),outfile))

            ### Output File ###
            _stage = 'Output File'
            if not self.opt['Append'] or not os.path.exists(outfile):   # Create with header
                OUTFILE = open(outfile,'w')
                headers = ['seq_id','enzyme','enz_group','aa','pos','score','percentile','matchseq','sa']
                rje.writeDelimit(OUTFILE,headers,delimit)
            else:
                OUTFILE = open(outfile,'a')

            ### Conversion ###
            _stage = 'Conversion'
            sx = 0
            for infile in filelist:
                if not os.path.exists(infile):
                    self.log.errorLog('Input file %s does not exist! :o(' % infile,False,False)
                    continue
                fx = 0
                INFILE = open(infile,'r')
                inline = rje.nextLine(INFILE)
                while inline != None:
                    if rje.matchExp(re_scansite,inline):
                        scanlist = rje.matchExp(re_scansite,inline)
                    rje.writeDelimit(OUTFILE,scanlist,delimit)
                    sx += 1
                    fx += 1
                    rje.progressPrint(self,sx)
                    inline = rje.nextLine(INFILE)
                self.log.printLog('#OUT','%s scansite results from %s. (%s Total.)' % (rje.integerString(fx),infile,rje.integerString(sx)))
                INFILE.close()

            ### End ###
            _stage = 'End'
            OUTFILE.close()
            self.log.printLog('#OUT','%s scansite results output to %s.' % (rje.integerString(sx),outfile))
            return True            
        except:
            self.log.errorLog('Error in convert(%s)' % _stage,printerror=True,quitchoice=False)
            raise   
コード例 #2
0
 def scap(self):     ### Full SCAP method
     '''Full SCAP method.'''
     try:### ~ [1] Setup ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         markov = self.obj['Markov']
         minx = markov.stat['MinXmer']
         maxx = markov.stat['MaxXmer']
         headers = ['seq','type','sorted']
         for x in range(minx,maxx+1): headers.append('X%d' % x)
         delimit = rje.getDelimit(self.cmd_list,'\t')
         scapfile = '%s.%s' % (self.info['Basefile'],rje.delimitExt(delimit))
         rje.delimitedFileOutput(self,scapfile,headers,delimit,rje_backup=True)
         ### ~ [2] SCAP ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         ## ~ [2a] Query ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
         (sx,stot) = (0.0,self.obj['SeqList'].seqNum())
         for seq in self.obj['SeqList'].seq:
             self.progLog('\r#SCAP','SCAP processing Query to %s: %.2f%%' % (scapfile,(sx/stot))); sx += 100.0
             datadict = {'seq':seq.shortName(),'type':'qry','sorted':markov.opt['Sorted']}
             for x in range(minx,maxx+1): 
                 datadict['X%d' % x] = self.scapSeq(seq.info['Sequence'],x)
                 if datadict['X%d' % x] > 0.001: datadict['X%d' % x] = '%.4f' % datadict['X%d' % x]
                 else: datadict['X%d' % x] = '%.3e' % datadict['X%d' % x]
             rje.delimitedFileOutput(self,scapfile,headers,delimit,datadict)
         self.printLog('\r#SCAP','SCAP processed Query to %s for %s sequences.' % (scapfile,rje.integerString(stot)))
         ## ~ [2b] Background ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
         if self.obj['ScapBack'] != self.obj['SeqList']:
             (sx,stot) = (0.0,self.obj['ScapBack'].seqNum())
             for seq in self.obj['ScapBack'].seq:
                 self.progLog('\r#SCAP','SCAP processing Background to %s: %.2f%%' % (scapfile,(sx/stot))); sx += 100.0
                 datadict = {'seq':seq.shortName(),'type':'bg','sorted':markov.opt['Sorted']}
                 for x in range(minx,maxx+1):
                     datadict['X%d' % x] = self.scapSeq(seq.info['Sequence'],x)
                     if datadict['X%d' % x] > 0.001: datadict['X%d' % x] = '%.4f' % datadict['X%d' % x]
                     else: datadict['X%d' % x] = '%.3e' % datadict['X%d' % x]
                 rje.delimitedFileOutput(self,scapfile,headers,delimit,datadict)
             self.printLog('\r#SCAP','SCAP processed Background to %s for %s sequences.' % (scapfile,rje.integerString(stot)))
         if markov.opt['Sorted']: self.printLog('#SCAP','Sorted SCAP run complete')
         else: self.printLog('#SCAP','UnSorted SCAP run complete')
     except: self.errorLog(rje_zen.Zen().wisdom())
コード例 #3
0
    def _pepDis(self):  ### Peptide Distance
        '''
        Peptide Distance.
        '''
        try:
            ### <0> ### Setup
            seqlist = rje_seq.SeqList(self.log, self.cmd_list + ['autoload=T'])
            dismatrix = rje_dismatrix.DisMatrix(self.log, self.cmd_list)
            dismatrix.info['Name'] = self.info['Method']
            dismatrix.opt['Symmetric'] = True
            if self.info['Method'] in ['ds_prop', 'tot_prop', 'best_prop']:
                aaprop = rje_aaprop.AAPropMatrix(self.log, self.cmd_list)
                #aaprop.readAAProp()
                aaprop.makePropDif()
            elif self.info['Method'] == 'pam':
                pam = rje_pam.PamCtrl(log=self.log, cmd_list=self.cmd_list)
            ### <1> ### Make DisMatrix
            for seq1 in seqlist.seq:
                for seq2 in seqlist.seq:
                    if seqlist.seq.index(seq1) > seqlist.seq.index(
                            seq2):  # No need to calculate - symmetrical!
                        continue
                    dis = 0
                    if seq1 == seq2 and self.info['OutMatrix'] == 'phylip':
                        dis = 0
                    elif self.info['Method'] in ['ds_prop', 'ds_id']:
                        (self_dis1, self_dis2) = (0, 0)
                        for r1 in range(seq1.seqLen()):
                            for r2 in range(r1, seq2.seqLen()):
                                (a1, a2) = (seq1.info['Sequence'][r1],
                                            seq2.info['Sequence'][r2])
                                (s1, s2) = (seq1.info['Sequence'][r2],
                                            seq2.info['Sequence'][r1])
                                phys_dis = r2 - r1
                                if self.info['Method'] == 'ds_prop':
                                    dis += (aaprop.pdif['%s%s' % (a1, a2)] *
                                            (seq1.seqLen() - phys_dis))
                                    self_dis1 += (aaprop.pdif['%s%s' %
                                                              (a1, s1)] *
                                                  (seq1.seqLen() - phys_dis))
                                    self_dis2 += (aaprop.pdif['%s%s' %
                                                              (a2, s2)] *
                                                  (seq1.seqLen() - phys_dis))
                                elif self.info[
                                        'Method'] == 'ds_id' and a1 != a2:
                                    dis += (seq1.seqLen() - phys_dis)
                                if self.info['Method'] == 'ds_id' and a1 != s1:
                                    self_dis1 += (seq1.seqLen() - phys_dis)
                                if self.info['Method'] == 'ds_id' and a2 != s2:
                                    self_dis2 += (seq1.seqLen() - phys_dis)
                        dis -= (self_dis1 + self_dis2) / 2.0
                    elif self.info['Method'] == 'tot_prop':
                        proptot = {}
                        for property in aaprop.prop.keys():
                            proptot[property] = {seq1: 0.0, seq2: 0.0}
                        for seq in [seq1, seq2]:
                            for r in range(seq.seqLen()):
                                aa = seq.info['Sequence'][r]
                                for property in aaprop.prop.keys():
                                    proptot[property][seq] += string.atof(
                                        aaprop.prop[property][aa])
                        for property in aaprop.prop.keys():
                            if proptot[property][seq1] > proptot[property][
                                    seq2]:
                                dis += (proptot[property][seq1] -
                                        proptot[property][seq2])
                            else:
                                dis += (proptot[property][seq2] -
                                        proptot[property][seq1])
                    elif self.info['Method'] == 'pam':
                        dis = pam.pamML(ancseq=seq1.info['Sequence'],
                                        descseq=seq2.info['Sequence'])
                    elif self.info['Method'] == 'best_prop':
                        min_dis = seq1.seqLen() * len(aaprop.prop)
                        pepseq1 = seq1.info['Sequence']
                        for c in range(seq1.seqLen()):  # Circular start
                            dis = 0
                            pepseq2 = seq2.info['Sequence'][c:] + seq2.info[
                                'Sequence'][:c]
                            for r in range(seq1.seqLen()):
                                (a1, a2) = (pepseq1[r], pepseq2[r])
                                dis += aaprop.pdif['%s%s' % (a1, a2)]
                            if dis < min_dis:
                                min_dis = dis
                        dis = min_dis
                    dismatrix.addDis(seq1, seq2, dis)
            ### <2> ### Output
            if self.info['OutMatrix'] == 'phylip':
                delimit = ' '
                format = 'phylip'
            else:
                delimit = rje.getDelimit(self.cmd_list, ',')
                format = 'None'
            outfile = '%s.%s.%s' % (rje.baseFile(
                seqlist.info['Name'],
                True), self.info['Method'], rje.delimitExt(delimit))
            dismatrix.saveMatrix(seqlist.seq, outfile, delimit, format=format)

        except:
            self.log.errorLog('Error in _pepDis',
                              printerror=True,
                              quitchoice=False)
            raise  # Delete this if method error not terrible
コード例 #4
0
ファイル: rje_scansite.py プロジェクト: lyhniupi1/SLiMSuite
    def convert(self,
                filelist=[],
                outfile=None
                ):  ### Converts scansite output files in FileList to Outfile
        '''
        Converts scansite output files in FileList to Outfile.
        '''
        try:
            ### Setup ###
            _stage = 'Setup'
            if len(filelist) < 1:
                filelist = self.list['FileList']
            if not outfile:
                outfile = self.info['Name']
            if len(filelist) < 1:
                self.log.errorLog(
                    'No scansite files to convert! %s unchanged/not made.' %
                    outfile,
                    printerror=False)
                return False
            delimit = rje.getDelimit(self.cmd_list)
            ext = rje.delimitExt(delimit)
            if ext != outfile[-3:]:
                newfile = outfile[:-3] + ext
                if rje.yesNo('Change file name from %s to %s?' %
                             (outfile, newfile)):
                    outfile = newfile
            self.log.printLog(
                '#OUT', 'Converting %d file(s), output to %s.' %
                (len(filelist), outfile))

            ### Output File ###
            _stage = 'Output File'
            if not self.opt['Append'] or not os.path.exists(
                    outfile):  # Create with header
                OUTFILE = open(outfile, 'w')
                headers = [
                    'seq_id', 'enzyme', 'enz_group', 'aa', 'pos', 'score',
                    'percentile', 'matchseq', 'sa'
                ]
                rje.writeDelimit(OUTFILE, headers, delimit)
            else:
                OUTFILE = open(outfile, 'a')

            ### Conversion ###
            _stage = 'Conversion'
            sx = 0
            for infile in filelist:
                if not os.path.exists(infile):
                    self.log.errorLog(
                        'Input file %s does not exist! :o(' % infile, False,
                        False)
                    continue
                fx = 0
                INFILE = open(infile, 'r')
                inline = rje.nextLine(INFILE)
                while inline != None:
                    if rje.matchExp(re_scansite, inline):
                        scanlist = rje.matchExp(re_scansite, inline)
                    rje.writeDelimit(OUTFILE, scanlist, delimit)
                    sx += 1
                    fx += 1
                    rje.progressPrint(self, sx)
                    inline = rje.nextLine(INFILE)
                self.log.printLog(
                    '#OUT', '%s scansite results from %s. (%s Total.)' %
                    (rje.integerString(fx), infile, rje.integerString(sx)))
                INFILE.close()

            ### End ###
            _stage = 'End'
            OUTFILE.close()
            self.log.printLog(
                '#OUT', '%s scansite results output to %s.' %
                (rje.integerString(sx), outfile))
            return True
        except:
            self.log.errorLog('Error in convert(%s)' % _stage,
                              printerror=True,
                              quitchoice=False)
            raise
コード例 #5
0
ファイル: rje_hmm_V1.py プロジェクト: kwikwag/SLiMSuite
    def hmmTable(self,outfile='',append=False,delimit=None):    ### Outputs results table
        '''
        Outputs results table.
        >> outfile:str = Name of output file
        >> append:boolean = whether to append file
        >> delimit:str = Delimiter to use [\t]
        '''
        try:
            ### Setup ###
            if not outfile: outfile = self.info['HMMTab']
            if outfile.lower() == 'none':
                self.log.printLog('#TAB','HMMTab = "None": No table output')
                return False
            if not delimit: delimit = rje.getDelimit(self.cmd_list,'\t')
            if not outfile: outfile = '%s.hmmer.%s' % (rje.baseFile(self.info['SearchDB'],True),rje.delimitExt(delimit))
            self.readResults()
            self.log.printLog('#TAB','Tabulating results for %s searches into %s' % (len(self.search),outfile),log=False)

            ### Setup Resfile ###
            if self.opt['MySQL']: headers = ['HMM','Hit','Hit_Start','Hit_End','Eval','Score']
            else: headers = ['Type','Name','Start','End','Eval','Score']
            if not append or not os.path.exists(outfile): rje.delimitedFileOutput(self,outfile,headers,delimit,rje_backup=True)
            
            ### Output Search details ###
            for search in self.search:
                for hit in search.hit:
                    for aln in hit.aln:
                        out = {'HMM':search.info['Name'],'Type':search.info['Name'],
                               'Name':hit.info['Name'],'Hit':hit.info['Name'],
                               'Start':'%d' % aln.stat['SbjStart'], 'End':'%d' % aln.stat['SbjEnd'],
                               'Hit_Start':'%d' % aln.stat['SbjStart'], 'Hit_End':'%d' % aln.stat['SbjEnd'],
                               'Eval':'%.2e' % aln.stat['Expect'],'Score':'%.1f' % aln.stat['BitScore']}
                        rje.delimitedFileOutput(self,outfile,headers,delimit,out)
            self.log.printLog('#OUT','Results for %s searches output to %s.' % (len(self.search),outfile))
        except:
            self.log.errorLog('Fatal Error during hmmTable(%s).' % outfile)
            raise
コード例 #6
0
ファイル: seqmapper.py プロジェクト: slimsuite/SLiMSuite
 def _setupOutput(self): ### Sets up output files self.str['MapFas','MissFas','MapRes']
     '''Sets up output files self.str['MapFas','MissFas','MapRes'].'''
     ### ~ [0] Setup ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
     delimit = rje.getDelimit(self.cmd_list)
     if self.str['StartFrom'].lower() in ['','none']: self.str['StartFrom'] = ''
     else:
         self.bool['Append'] = True
         self.printLog('#CMD','StartFrom = "%s" so Append=T' % self.str['StartFrom'])
     ### ~ [1] General ResFile ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
     files = {'MapFas':'mapping.fas','MissFas':'missing.fas','MapRes':'mapping.%s' % rje.delimitExt(delimit)}
     if self.getBool('Combine'): files.pop('MissFas')
     if self.str['ResFile'].lower() in ['','none']:
         self.str['ResFile'] = '%s.%s' % (rje.baseFile(self.str['SeqIn']),rje.baseFile(self.str['MapDB'],strip_path=True))
     for file in files.keys():
         self.setStr({file: self.getStr('ResFile') + '.' + files[file]})
         rje.backup(self,self.getStr(file))
     ### ~ [2] Headers for MapRes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
     #!# Consider replacing with rje_db object? #!#
     self.list['Headers'] = ['Query','Hit','Method','MapRank','BlastRank','EVal','Score']
     for qh in ['Query','Hit']:
         self.list['Headers'] += ['%s_Species' % qh]
         if self.bool['GablamOut']:
             for st in ['Len','Sim','ID']:
                 self.list['Headers'] += ['%s_%s' % (qh,st)]
     rje.delimitedFileOutput(self,self.str['MapRes'],self.list['Headers'],delimit)
コード例 #7
0
ファイル: rje_hmm_V1.py プロジェクト: lyhniupi1/SLiMSuite
    def hmmTable(self,
                 outfile='',
                 append=False,
                 delimit=None):  ### Outputs results table
        '''
        Outputs results table.
        >> outfile:str = Name of output file
        >> append:boolean = whether to append file
        >> delimit:str = Delimiter to use [\t]
        '''
        try:
            ### Setup ###
            if not outfile: outfile = self.info['HMMTab']
            if outfile.lower() == 'none':
                self.log.printLog('#TAB', 'HMMTab = "None": No table output')
                return False
            if not delimit: delimit = rje.getDelimit(self.cmd_list, '\t')
            if not outfile:
                outfile = '%s.hmmer.%s' % (rje.baseFile(
                    self.info['SearchDB'], True), rje.delimitExt(delimit))
            self.readResults()
            self.log.printLog('#TAB',
                              'Tabulating results for %s searches into %s' %
                              (len(self.search), outfile),
                              log=False)

            ### Setup Resfile ###
            if self.opt['MySQL']:
                headers = [
                    'HMM', 'Hit', 'Hit_Start', 'Hit_End', 'Eval', 'Score'
                ]
            else:
                headers = ['Type', 'Name', 'Start', 'End', 'Eval', 'Score']
            if not append or not os.path.exists(outfile):
                rje.delimitedFileOutput(self,
                                        outfile,
                                        headers,
                                        delimit,
                                        rje_backup=True)

            ### Output Search details ###
            for search in self.search:
                for hit in search.hit:
                    for aln in hit.aln:
                        out = {
                            'HMM': search.info['Name'],
                            'Type': search.info['Name'],
                            'Name': hit.info['Name'],
                            'Hit': hit.info['Name'],
                            'Start': '%d' % aln.stat['SbjStart'],
                            'End': '%d' % aln.stat['SbjEnd'],
                            'Hit_Start': '%d' % aln.stat['SbjStart'],
                            'Hit_End': '%d' % aln.stat['SbjEnd'],
                            'Eval': '%.2e' % aln.stat['Expect'],
                            'Score': '%.1f' % aln.stat['BitScore']
                        }
                        rje.delimitedFileOutput(self, outfile, headers,
                                                delimit, out)
            self.log.printLog(
                '#OUT', 'Results for %s searches output to %s.' %
                (len(self.search), outfile))
        except:
            self.log.errorLog('Fatal Error during hmmTable(%s).' % outfile)
            raise
コード例 #8
0
ファイル: seqmapper.py プロジェクト: kwikwag/SLiMSuite
 def _setupOutput(self): ### Sets up output files self.str['MapFas','MissFas','MapRes']
     '''Sets up output files self.str['MapFas','MissFas','MapRes'].'''
     ### ~ [0] Setup ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
     delimit = rje.getDelimit(self.cmd_list)
     if self.str['StartFrom'].lower() in ['','none']: self.str['StartFrom'] = ''
     else:
         self.bool['Append'] = True
         self.printLog('#CMD','StartFrom = "%s" so Append=T' % self.str['StartFrom'])
     ### ~ [1] General ResFile ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
     files = {'MapFas':'mapping.fas','MissFas':'missing.fas','MapRes':'mapping.%s' % rje.delimitExt(delimit)}
     if self.getBool('Combine'): files.pop('MissFas')
     if self.str['ResFile'].lower() in ['','none']:
         self.str['ResFile'] = '%s.%s' % (rje.baseFile(self.str['SeqIn']),rje.baseFile(self.str['MapDB'],strip_path=True))
     for file in files.keys():
         self.setStr({file: self.getStr('ResFile') + '.' + files[file]})
         rje.backup(self,self.getStr(file))
     ### ~ [2] Headers for MapRes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
     #!# Consider replacing with rje_db object? #!#
     self.list['Headers'] = ['Query','Hit','Method','MapRank','BlastRank','EVal','Score']
     for qh in ['Query','Hit']:
         self.list['Headers'] += ['%s_Species' % qh]
         if self.bool['GablamOut']:
             for st in ['Len','Sim','ID']:
                 self.list['Headers'] += ['%s_%s' % (qh,st)]
     rje.delimitedFileOutput(self,self.str['MapRes'],self.list['Headers'],delimit)
コード例 #9
0
 def _pepDis(self):      ### Peptide Distance
     '''
     Peptide Distance.
     '''
     try:
         ### <0> ### Setup
         seqlist = rje_seq.SeqList(self.log,self.cmd_list+['autoload=T'])
         dismatrix = rje_dismatrix.DisMatrix(self.log,self.cmd_list)
         dismatrix.info['Name'] = self.info['Method']
         dismatrix.opt['Symmetric'] = True
         if self.info['Method'] in ['ds_prop','tot_prop','best_prop']:
             aaprop = rje_aaprop.AAPropMatrix(self.log,self.cmd_list)
             #aaprop.readAAProp()
             aaprop.makePropDif()
         elif self.info['Method'] == 'pam':
             pam = rje_pam.PamCtrl(log=self.log,cmd_list=self.cmd_list)
         ### <1> ### Make DisMatrix
         for seq1 in seqlist.seq:
             for seq2 in seqlist.seq:
                 if seqlist.seq.index(seq1) > seqlist.seq.index(seq2):   # No need to calculate - symmetrical!
                     continue
                 dis = 0
                 if seq1 == seq2 and self.info['OutMatrix'] == 'phylip':
                     dis = 0
                 elif self.info['Method'] in ['ds_prop','ds_id']:
                     (self_dis1,self_dis2) = (0,0)
                     for r1 in range(seq1.seqLen()):
                         for r2 in range(r1,seq2.seqLen()):
                             (a1,a2) = (seq1.info['Sequence'][r1],seq2.info['Sequence'][r2])
                             (s1,s2) = (seq1.info['Sequence'][r2],seq2.info['Sequence'][r1])
                             phys_dis = r2 - r1
                             if self.info['Method'] == 'ds_prop':
                                 dis += (aaprop.pdif['%s%s' % (a1,a2)] * (seq1.seqLen() - phys_dis))
                                 self_dis1 += (aaprop.pdif['%s%s' % (a1,s1)] * (seq1.seqLen() - phys_dis))
                                 self_dis2 += (aaprop.pdif['%s%s' % (a2,s2)] * (seq1.seqLen() - phys_dis))
                             elif self.info['Method'] == 'ds_id' and a1 != a2:
                                 dis += (seq1.seqLen() - phys_dis)
                             if self.info['Method'] == 'ds_id' and a1 != s1:
                                 self_dis1 += (seq1.seqLen() - phys_dis)
                             if self.info['Method'] == 'ds_id' and a2 != s2:
                                 self_dis2 += (seq1.seqLen() - phys_dis)
                     dis -= (self_dis1 + self_dis2) / 2.0
                 elif self.info['Method'] == 'tot_prop':
                     proptot = {}
                     for property in aaprop.prop.keys():
                         proptot[property] = {seq1:0.0,seq2:0.0}
                     for seq in [seq1,seq2]:
                         for r in range(seq.seqLen()):
                             aa = seq.info['Sequence'][r]
                             for property in aaprop.prop.keys():
                                 proptot[property][seq] += string.atof(aaprop.prop[property][aa])
                     for property in aaprop.prop.keys():
                         if proptot[property][seq1] > proptot[property][seq2]:
                             dis += (proptot[property][seq1] - proptot[property][seq2])
                         else:
                             dis += (proptot[property][seq2] - proptot[property][seq1])
                 elif self.info['Method'] == 'pam':
                     dis = pam.pamML(ancseq=seq1.info['Sequence'],descseq=seq2.info['Sequence'])
                 elif self.info['Method'] == 'best_prop':
                     min_dis = seq1.seqLen() * len(aaprop.prop)
                     pepseq1 = seq1.info['Sequence']
                     for c in range(seq1.seqLen()):  # Circular start
                         dis = 0
                         pepseq2 = seq2.info['Sequence'][c:] + seq2.info['Sequence'][:c]
                         for r in range(seq1.seqLen()):
                             (a1,a2) = (pepseq1[r],pepseq2[r])
                             dis += aaprop.pdif['%s%s' % (a1,a2)]
                         if dis < min_dis:
                             min_dis = dis
                     dis = min_dis
                 dismatrix.addDis(seq1,seq2,dis)
         ### <2> ### Output
         if self.info['OutMatrix'] == 'phylip':
             delimit = ' '
             format = 'phylip'
         else:
             delimit = rje.getDelimit(self.cmd_list,',')
             format = 'None'
         outfile = '%s.%s.%s' % (rje.baseFile(seqlist.info['Name'],True),self.info['Method'],rje.delimitExt(delimit))
         dismatrix.saveMatrix(seqlist.seq,outfile,delimit,format=format)
                 
     except:
         self.log.errorLog('Error in _pepDis',printerror=True,quitchoice=False)
         raise   # Delete this if method error not terrible
コード例 #10
0
ファイル: peptide_stats.py プロジェクト: lyhniupi1/SLiMSuite
    def _pepStats(self):  ### Peptide Distance
        '''
        Peptide Distance.
        '''
        try:
            ### Setup ###
            seqlist = rje_seq.SeqList(self.log, self.cmd_list + ['autoload=T'])
            aaprop = rje_aaprop.AAPropMatrix(self.log, self.cmd_list)
            aaprop.makePropDif()
            delimit = rje.getDelimit(self.cmd_list)

            ### Output File Setup ###
            OUTFILE = open('hrb.pepstats.%s' % rje.delimitExt(delimit), 'w')
            headlist = ['peptide']
            ## 10 Dimensional Peptide Property Output ##
            for property in rje.sortKeys(aaprop.prop):
                headlist.append(property.lower())
                for aa in aaprop.prop[property].keys():
                    try:
                        if aa not in ['-', 'X']:
                            aaprop.prop[property][aa] = string.atoi(
                                aaprop.prop[property][aa])
                    except:
                        print aaprop.prop, property, aa, aaprop.prop[property][
                            aa]
                        raise
            ## Additional Stats ##
            headlist.append('net_charge')
            #headlist.append('hydrophobicity')
            headlist.append('charge_balance')
            headlist.append('hydrophobic_balance')
            #headlist.append('hydrophobicity_balance')
            ## Output
            rje.writeDelimit(OUTFILE, headlist, delimit)

            ### Calculate stats ###
            for pep in seqlist.seq:
                pepname = pep.shortName()
                if rje.matchExp('^(\S+_\d[CQ])', pepname):
                    pepname = rje.matchExp('^(\S+_\d[CQ])', pepname)[0]
                outlist = [pepname]
                pepseq = pep.info['Sequence']
                ## 10 Dimensional Peptide Property Output ##
                for property in rje.sortKeys(aaprop.prop):
                    px = 0
                    for aa in pepseq:
                        px += aaprop.prop[property][aa]
                    outlist.append('%d' % px)
                ## Additional Stats ##
                net_charge = 0
                for aa in pepseq:
                    net_charge += (aaprop.prop['Positive'][aa] -
                                   aaprop.prop['Negative'][aa])
                outlist.append('%d' % net_charge)
                charge_balance = 0
                hydrophobic_balance = 0
                for r in range(len(pepseq)):
                    charge_balance += aaprop.prop['Charged'][pepseq[r]] * (
                        1.0 / (r + 1))
                    charge_balance -= aaprop.prop['Charged'][pepseq[r]] * (
                        1.0 / (10 - r))
                    hydrophobic_balance += aaprop.prop['Hydrophobic'][
                        pepseq[r]] * (1.0 / (r + 1))
                    hydrophobic_balance -= aaprop.prop['Hydrophobic'][
                        pepseq[r]] * (1.0 / (10 - r))
                outlist.append('%.3f' % charge_balance)
                outlist.append('%.3f' % hydrophobic_balance)
                rje.writeDelimit(OUTFILE, outlist, delimit)

            ### Finish ###
            OUTFILE.close()

        except:
            self.log.errorLog('Error in _pepStats',
                              printerror=True,
                              quitchoice=False)
            raise  # Delete this if method error not terrible
コード例 #11
0
	def run(self):		### Main Run method
		'''
		Main Run method.
		'''
		try:
			### SLiMDisc Run ###
			if self.opt['SLiMDisc']:
				return self.slimDisc()
			
			### TEIRESIAS ###
			if self.opt['Teiresias']:
				## Setup ##
				seqlist = rje_seq.SeqList(self.log,self.cmd_list)
				infile = '%s.teiresias.fas' % rje.baseFile(seqlist.info['Name'],True)
				outfile = '%s.teiresias.out' % rje.baseFile(seqlist.info['Name'],True)
				run_teiresias = True
				if rje.isYounger(outfile,infile) == outfile:
					if self.stat['Interactive'] < 1 or not rje.yesNo('%s and %s exist already. Regenerate?' % (infile,outfile),'N'):
						run_teiresias = False
				## Run TEIRESIAS ##
				if run_teiresias:
					seqlist.saveFasta(seqfile=infile,name='Teiresias')	### Saves sequences in fasta format
					command = rje.makePath(self.info['TeiresiasPath'],True)
					command += ' -i%s -o%s %s' % (infile,outfile,self.info['TeiresiasOpt'])
					self.log.printLog('#CMD',command)
					os.system(command)
				## Read Results ##
				self.verbose(0,2,'Reading TEIRESIAS output from %s...' % outfile,1)
				self.list['Pattern'] = []
				RESULTS = open(outfile,'r')
				line = RESULTS.readline()
				while line:
					if rje.matchExp('^(\d+)\s+(\d+)\s+(\S+)\s+(\d.+\d)$',line): # New pattern
						self.addTeiresiasPattern(rje.matchExp('^(\d+)\s+(\d+)\s+(\S+)\s+(\d.+\d)$',line))
					elif len(line) > 3 and line[0] != '#':
						self.log.errorLog('Did not recognise line: %s' % line,False,False)
					line = RESULTS.readline()
				RESULTS.close()
				patx = len(self.list['Pattern'])
				self.log.printLog('#PAT','%s TEIRESIAS patterns read from %s.' % (rje.integerString(patx),outfile))
				## Calculate Information Content ##
				aafreq = seqlist.aaFreq()
				self.verbose(0,3,'Calculating Information Content & Length stats...',0)
				occx = 0
				for pattern in self.list['Pattern']:
					pattern.stat['Info'] = self.calculateScore(pattern.info['Pattern'],aafreq)
					pattern._makeLength()
					occx += 1
					rje.progressPrint(self,occx,patx/100,patx/10)
				self.verbose(0,1,'...Done!',2)
				## Prepare Results ##
				delimit = rje.getDelimit(self.cmd_list)
				if self.info['Name'] == 'None':
					self.info['Name'] = '%s.teiresias.%s' % (rje.baseFile(seqlist.info['Name'],True),rje.delimitExt(delimit))
				if self.opt['MySQL']:	# Two tables
					patfile = os.path.splitext(self.info['Name'])
					occfile = '%s.occ%s' % (patfile[0],patfile[1])
					patfile = '%s.patterns%s' % (patfile[0],patfile[1])
					if self.opt['Append']:
						PATFILE = open(patfile,'a')
						OCCFILE = open(occfile,'a')
					else:
						PATFILE = open(patfile,'w')
						rje.writeDelimit(PATFILE,['pattern','tot_occ','seq_occ','info','len','fix','wild'],delimit)
						OCCFILE = open(occfile,'a')
						rje.writeDelimit(OCCFILE,['seq_id','pos','pattern','pat_match'],delimit)
				else:
					if self.opt['Append']:
						RESFILE = open(self.info['Name'],'a')
					else:
						RESFILE = open(patfile,'w')
						rje.writeDelimit(RESFILE,['Sequence Name','Position','Pattern','Match','Total Occurrences','Num Sequences','Information Content','Length','Fixed','Wildcard'],delimit)
				## Save Results ##
				occx = 0
				for pattern in self.list['Pattern']:
					patstats = []
					for stat in ['OccCount','SeqCount','Info','Length','Fixed','Wildcards']:
						patstats.append('%d' % pattern.stat[stat])
					patstats[2] = '%.3f' % pattern.stat['Info']
					if self.opt['MySQL']:	# Two tables
						rje.writeDelimit(PATFILE,[pattern.info['Pattern']] + patstats,delimit)
					for occ in rje.sortKeys(pattern.occ):
						seq = seqlist.seq[occ]
						for pos in pattern.occ[occ]:
							match = seq.info['Sequence'][pos:(pos+pattern.stat['Length'])]
							outlist = [seq.shortName(),'%d' % pos,pattern.info['Pattern'],match]
							if self.opt['MySQL']:	# Two tables
								rje.writeDelimit(OCCFILE,outlist,delimit)
							else:
								rje.writeDelimit(RESFILE,outlist+patstats,delimit)
							occx += 1
				if self.opt['MySQL']:	# Two tables
					PATFILE.close()
					OCCFILE.close()
					self.log.printLog('#OUT','%s patterns output to %s.' % (rje.integerString(patx),patfile))
					self.log.printLog('#OUT','%s pattern occurrences output to %s.' % (rje.integerString(occx),occfile))
				else:
					RESFILE.close()
					self.log.printLog('#OUT','%s occurrences of %s patterns output to %s.' %
									  (rje.integerString(occx),rje.integerString(patx),self.info['Name']))

			### InfoContent ###
			elif self.info['Info'] != 'None':
				## Setup ##
				alphabet = rje_seq.alph_protx 
				if not os.path.exists(self.info['Info']):
					self.log.errorLog('Input file %s missing!' % self.info['Info'],False,False)
					return False
				else:
					mypresto = presto.Presto(self.log,self.cmd_list)
					mypresto.loadMotifs(file=self.info['Info'],clear=True)
				seqlist = rje_seq.SeqList(self.log,self.cmd_list+['autoload=T'])
				if seqlist.seqNum() > 0:
					aafreq = seqlist.aaFreq(alphabet=None,fromfile=None,loadfile=None,total=False)  ### Returns dictionary of AA (& gap etc.) frequencies
				else:
					aafreq = {}
					for aa in alphabet:
						aafreq[aa] = 1.0 / len(alphabet)
				alphabet = aafreq.keys()
				maxinfo = 0 
				for aa in alphabet:
					maxinfo +=  (aafreq[aa] * math.log(aafreq[aa],2))
				## Output ##
				delimit = rje.getDelimit(self.cmd_list)
				ext = rje.delimitExt(delimit)
				outfile = '%s.info.%s' % (rje.baseFile(self.info['Info'],True,['.txt','.%s' % ext]),ext)
				if self.opt['Append']:
					OUTFILE = open(outfile,'a')
				else:
					OUTFILE = open(outfile,'w')
					rje.writeDelimit(OUTFILE,['motif','pattern','info'],delimit)
				
				## Calculate Information Scores ##
				for motif in mypresto.motif:
					self.verbose(2,4,motif.info['Sequence'],0)
					pattern = string.replace(motif.info['Sequence'],'X','.')
					elements = string.split(pattern,'-')
					pattern = ''
					for el in elements:
						if el.find('.{') == 0:	# Ambiguous spacer length - compress
							pattern += '.'
						else:
							pattern += el
					self.verbose(2,2,'=> %s' % pattern,1)
					motif.stat['Info'] = self.calculateInformationContent(pattern,aafreq,maxinfo,self.stat['InfoGapPen'])
					self.verbose(0,3,'%s (%s) = %.2f' % (motif.info['Name'],pattern,motif.stat['Info']),1)
					## Output ##
					rje.writeDelimit(OUTFILE,[motif.info['Name'],pattern,'%.2f' % motif.stat['Info']],delimit)
				
				## Finish ##
				OUTFILE.close()
		except:
			self.log.errorLog('Error in run().',printerror=True,quitchoice=False)
			raise	# Delete this if method error not terrible
コード例 #12
0
ファイル: peptide_stats.py プロジェクト: kwikwag/SLiMSuite
    def _pepStats(self):      ### Peptide Distance
        '''
        Peptide Distance.
        '''
        try:
            ### Setup ###
            seqlist = rje_seq.SeqList(self.log,self.cmd_list+['autoload=T'])
            aaprop = rje_aaprop.AAPropMatrix(self.log,self.cmd_list)
            aaprop.makePropDif()
            delimit = rje.getDelimit(self.cmd_list)

            ### Output File Setup ###
            OUTFILE = open('hrb.pepstats.%s' % rje.delimitExt(delimit),'w')
            headlist = ['peptide']
            ## 10 Dimensional Peptide Property Output ##
            for property in rje.sortKeys(aaprop.prop):
                headlist.append(property.lower())
                for aa in aaprop.prop[property].keys():
                    try:
                        if aa not in ['-','X']:
                            aaprop.prop[property][aa] = string.atoi(aaprop.prop[property][aa])
                    except:
                        print aaprop.prop, property, aa, aaprop.prop[property][aa]
                        raise
            ## Additional Stats ##
            headlist.append('net_charge')
            #headlist.append('hydrophobicity')
            headlist.append('charge_balance')
            headlist.append('hydrophobic_balance')
            #headlist.append('hydrophobicity_balance')
            ## Output
            rje.writeDelimit(OUTFILE,headlist,delimit)
            
            ### Calculate stats ###
            for pep in seqlist.seq:
                pepname = pep.shortName()
                if rje.matchExp('^(\S+_\d[CQ])',pepname):
                    pepname = rje.matchExp('^(\S+_\d[CQ])',pepname)[0]
                outlist = [pepname]
                pepseq = pep.info['Sequence']
                ## 10 Dimensional Peptide Property Output ##
                for property in rje.sortKeys(aaprop.prop):
                    px = 0
                    for aa in pepseq:
                        px += aaprop.prop[property][aa]
                    outlist.append('%d' % px)
                ## Additional Stats ##
                net_charge = 0
                for aa in pepseq:
                    net_charge += (aaprop.prop['Positive'][aa] - aaprop.prop['Negative'][aa])
                outlist.append('%d' % net_charge)
                charge_balance = 0
                hydrophobic_balance = 0
                for r in range(len(pepseq)):
                    charge_balance += aaprop.prop['Charged'][pepseq[r]] * (1.0 / (r+1))
                    charge_balance -= aaprop.prop['Charged'][pepseq[r]] * (1.0 / (10-r))
                    hydrophobic_balance += aaprop.prop['Hydrophobic'][pepseq[r]] * (1.0 / (r+1))
                    hydrophobic_balance -= aaprop.prop['Hydrophobic'][pepseq[r]] * (1.0 / (10-r))
                outlist.append('%.3f' % charge_balance)
                outlist.append('%.3f' % hydrophobic_balance)
                rje.writeDelimit(OUTFILE,outlist,delimit)
                        
            ### Finish ###
            OUTFILE.close()
                    
        except:
            self.log.errorLog('Error in _pepStats',printerror=True,quitchoice=False)
            raise   # Delete this if method error not terrible