예제 #1
0
 def startFork(self,
               fdict):  ### Sets a new fork going using the data in fdict.
     '''Sets a new fork going using the data in fdict.'''
     try:  ### ~ [0] ~ Setup ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         fdict['cmd'] = self.list['ToFork'].pop(0)
         fdict['ID'] = 'Fork %d' % self.list['Forked'].index(fdict)
         fdict['FID'] = 'f_%s' % rje.randomString(6)
         if self.getBool('RjePy'):
             fdict['Log'] = '%s%s.log' % (self.getStr('RunPath'),
                                          fdict['FID'])
             fdict['cmd'] += ' basefile=%s' % (fdict['Log'])
             fdict['ResFile'] = self.list['ResFile'][0:]
             try:
                 open(fdict['Log'], 'w')
             except:
                 self.errorLog('Log problem. Aborting fork.')
                 return self.endJob(fdict)
         ### ~ [2] ~ Add Fork ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         self.setNum({'KillTime': time.time()})
         cpid = os.fork()  # Fork child process
         if cpid:  # parent process records pid of child rsh process
             fdict['PID'] = cpid
             self.printLog(
                 '#FORK', 'Forking cmd as %s: %d remain; %.1f%% mem free' %
                 (cpid, len(self.list['ToFork']), fdict['Mem']))
             self.printLog('#FORK', '%s cmd: %s' % (cpid, fdict['cmd']))
         else:  # child process
             os.system(fdict['cmd'])
             os._exit(0)
     except SystemExit:
         raise  # Child
     except:
         self.errorLog('Forker.startFork error')
예제 #2
0
 def nextSeqJob(
     self, host_id
 ):  ### Sets an new job running on host with given index                             #V1.0
     '''Sets an new job running on host with given index.'''
     try:  ### ~ [1] ~ Setup ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         jdict = self.dict['Running'][host_id] = {
         }  # Setup empty dictionary to fill, if jobs available
         if self.list['Seq']:
             seq = self.list['Seq'].pop(0)  # UniFake this sequence
         else:
             return  # Out of sequences: stop
         if seq.info['AccNum'] in self.list['Pickup']:
             return self.nextSeqJob(host_id)  # Skip this sequence
         jran = 'i_%s' % rje.randomString(6)
         jdict['Log'] = '%s%s.log' % (self.getStr('RunPath'), jran)
         jdict['Qry'] = '%s%s.qry' % (self.getStr('RunPath'), jran)
         for out in self.list['OutList']:
             jdict[out] = '%s%s.%s' % (self.getStr('RunPath'), jran, out)
         open(jdict['Qry'], 'w').write(
             '>%s\n%s\n' % (seq.info['Name'], seq.info['Sequence']))
         job = 'python %s%s.py' % (self.getStr('PyPath'),
                                   self.getStr('Farm'))
         if self.getStr('JobINI'):
             job = '%s ini=%s' % (job, self.getStr('JobINI'))
         job = '%s seqin=%s i=-1 v=-1 basefile=%s' % (job, jdict['Qry'],
                                                      jran)
         initial_cmds = 'cd ' + self.getStr(
             'RunPath') + ' ; echo %s on `hostname` as %s ; ' % (
                 seq.shortName(), jran)
         if self.rsh():
             job = '%s %s ; echo Finishing on `hostname`' % (initial_cmds,
                                                             job)
             job = "rsh %s '%s'" % (self.list['Hosts'][host_id], job)
         ### ~ [2] ~ Add Job ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         try:
             cpid = os.fork()  # Fork child process
         except OSError:
             self.errorLog(
                 'JobFarmer.nextJob error. Will sleep for 10 mins then continue.'
             )
             time.sleep(600)
             self.printLog(
                 '#YAWN',
                 'Re-awakening JobFarmer nextJob. Fingers crossed.')
             jdict[
                 'Error'] = 'JobFarmer.nextJob OSError.'  #!# Make sure this node is retried.
             return
         if cpid:  # parent process records pid of child rsh process
             jdict['PID'] = cpid
             self.printLog(
                 '#SEQ', 'Running %s as %s [%d::%s]: %d remain' %
                 (seq.shortName(), cpid, host_id,
                  self.list['Hosts'][host_id], len(self.list['Seq'])))
         else:  # child process
             os.system(job)
             os._exit(0)
     except SystemExit:
         raise  # Child
     except:
         self.errorLog('JobFarmer.nextSeqJob error')
예제 #3
0
 def _setAttributes(self):   ### Sets Attributes of Object
     '''
     Sets Attributes of Object:
     - Info:str ['Program','QPath','Job']
     - Opt:boolean ['ModPurge','RjePy']
     - Stats:float ['Nodes','Walltime','VMem']
     - List:list ['Depend','Modules']
     - Dict:dictionary []
     - Obj:RJE_Object []
     '''
     ### Basics ###
     self.infolist = ['Program','QPath','Job','PyPath','Email','HPC','DependHPC']
     self.optlist = ['ModPurge','RjePy','Report','MailStart']
     self.statlist = ['Nodes','Walltime','PPN','Pause']
     self.listlist = ['Depend','PreCall','Modules']
     self.dictlist = []
     self.objlist = []
     ### Defaults ###
     self._setDefaults(info='None',opt=True,stat=0.0,obj=None,setlist=True,setdict=True)
     ### Other Attributes ###
     self.setInfo({'QPath':os.path.abspath(os.curdir),'Job':'rje_%s' % rje.randomString(4),
                   'PyPath':'/home/re1u06/Serpentry/','Email':'',
                   'HPC':'IRIDIS4','DependHPC':'blue30.iridis.soton.ac.uk'})
     self.setStat({'Walltime':60,'Nodes':1,'PPN':12,'Pause':5,'VMem':48})
     self.setOpt({'Report':False,'MailStart':False,'ModPurge':True})
예제 #4
0
 def nextSeqJob(self,host_id):  ### Sets an new job running on host with given index                             #V1.0
     '''Sets an new job running on host with given index.'''
     try:### ~ [1] ~ Setup ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         jdict = self.dict['Running'][host_id] = {}          # Setup empty dictionary to fill, if jobs available
         if self.list['Seq']: seq = self.list['Seq'].pop(0)  # UniFake this sequence
         else: return                                        # Out of sequences: stop
         if seq.info['AccNum'] in self.list['Pickup']: return self.nextSeqJob(host_id)   # Skip this sequence
         jran = 'i_%s' % rje.randomString(6)
         jdict['Log'] = '%s%s.log' % (self.getStr('RunPath'),jran)
         jdict['Qry'] = '%s%s.qry' % (self.getStr('RunPath'),jran)
         for out in self.list['OutList']: jdict[out] = '%s%s.%s' % (self.getStr('RunPath'),jran,out)
         open(jdict['Qry'],'w').write('>%s\n%s\n' % (seq.info['Name'],seq.info['Sequence']))
         job = 'python %s%s.py' % (self.getStr('PyPath'),self.getStr('Farm'))
         if self.getStr('JobINI'): job = '%s ini=%s' % (job,self.getStr('JobINI'))
         job = '%s seqin=%s i=-1 v=-1 basefile=%s' % (job,jdict['Qry'],jran)
         initial_cmds = 'cd ' + self.getStr('RunPath') + ' ; echo %s on `hostname` as %s ; ' % (seq.shortName(),jran)
         if self.rsh():
             job = '%s %s ; echo Finishing on `hostname`' % (initial_cmds,job)
             job = "rsh %s '%s'" % (self.list['Hosts'][host_id],job)
         ### ~ [2] ~ Add Job ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         try: cpid = os.fork()        # Fork child process
         except OSError:
             self.errorLog('JobFarmer.nextJob error. Will sleep for 10 mins then continue.')
             time.sleep(600)
             self.printLog('#YAWN','Re-awakening JobFarmer nextJob. Fingers crossed.')
             jdict['Error'] = 'JobFarmer.nextJob OSError.'     #!# Make sure this node is retried.
             return
         if cpid:                # parent process records pid of child rsh process
             jdict['PID'] = cpid
             self.printLog('#SEQ','Running %s as %s [%d::%s]: %d remain' % (seq.shortName(),cpid,host_id,self.list['Hosts'][host_id],len(self.list['Seq'])))
         else:                   # child process
             os.system(job)
             os._exit(0)
     except SystemExit: raise    # Child
     except: self.errorLog('JobFarmer.nextSeqJob error')
예제 #5
0
 def nextJob(
     self, host_id
 ):  ### Sets an new job running on host with given index                                #V1.0
     '''Sets an new job running on host with given index.'''
     try:  ### ~ [1] ~ Setup ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         node = self.list['Hosts'][host_id]
         freemem = self.freeMem(node)
         if self.getBool('SeqBySeq'): return self.nextSeqJob(host_id)
         jdict = self.dict['Running'][host_id] = {
         }  # Setup empty dictionary to fill, if jobs available
         if self.getNum('MemFree') > freemem:
             jdict['PID'] = 'WAIT - %.1f%% %s mem' % (freemem * 100.0, node)
             return
         if self.list['SubJobs']: job = self.list['SubJobs'].pop(0)
         else: return
         if self.getBool('RjePy'):
             jdict['Log'] = '%si_%s.log' % (self.getStr('RunPath'),
                                            rje.randomString(6))
         if self.getStr('JobINI'):
             job = '%s ini=%s' % (job, self.getStr('JobINI'))
         if 'Log' in jdict:
             job = '%s log=%s' % (job, jdict['Log'])
             try:
                 open(jdict['Log'], 'w')
             except:
                 self.errorLog('Log problem. Aborting %s job.' % host_id)
                 return self.endJob(host_id)
         ### ~ [2] ~ Add Job ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         rsh = "rsh %s '%s'" % (self.list['Hosts'][host_id], job)
         cpid = os.fork()  # Fork child process
         if cpid:  # parent process records pid of child rsh process
             jdict['PID'] = cpid
             if self.rsh(): self.printLog('#SUB', rsh)
             else: self.printLog('#SUB', job)
             self.printLog(
                 '#JOB', 'Running job as %s: %d remain; %.1f%% mem free' %
                 (cpid, len(self.list['SubJobs']), freemem * 100.0))
         else:  # child process
             if self.rsh(): os.system(rsh)
             else: os.system(job)
             os._exit(0)
     except SystemExit:
         raise  # Child
     except:
         self.errorLog('JobFarmer.nextJob error')
예제 #6
0
 def startFork(self,
               fdict):  ### Sets a new fork going using the data in fdict.
     '''Sets a new fork going using the data in fdict.'''
     try:  ### ~ [0] ~ Setup ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         seqin = self.seqinObj()
         fdict['seq'] = self.list['ToFork'].pop(0)
         (seqname, sequence) = seqin.getSeq(fdict['seq'])
         fdict['ID'] = 'Fork -%d' % (len(self.list['ToFork']) + 1)
         fdict['FID'] = 'f_%s' % rje.randomString(6)
         fdict['Log'] = '%s%s.log' % (self.getStr('RunPath'), fdict['FID'])
         fdict['ResFile'] = ['depthcharge.tdt']
         try:
             open(fdict['Log'], 'w')
         except:
             self.errorLog('Log problem. Aborting fork.')
             return self.endJob(fdict)
         ### ~ [2] ~ Add Fork ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         self.setNum({'KillTime': time.time()})
         cpid = os.fork()  # Fork child process
         if cpid:  # parent process records pid of child rsh process
             fdict['PID'] = cpid
             self.printLog('\r#FORK',
                           'Forking seq as %s: %d remain; %.1f%% mem free' %
                           (cpid, len(self.list['ToFork']), fdict['Mem']),
                           log=self.getBool('LogFork'),
                           screen=self.getBool('LogFork') or self.v() > 1)
             self.printLog('#FORK',
                           '%s seq: %s' % (cpid, fdict['seq']),
                           log=self.getBool('LogFork'),
                           screen=self.getBool('LogFork') or self.v() > 1)
         else:  # child process
             self.baseFile(fdict['FID'])
             self.setInt({'Interactive': -1})
             self.log.info['ErrorLog'] = ''
             self.log.info['LogFile'] = fdict['Log']
             self.log.opt[
                 'Quiet'] = True  # When True, will not write to screen or log apart from errors.
             self.list['ResFile'] = fdict['ResFile']
             self.depthCharge(seqname, sequence)
             os._exit(0)
     except SystemExit:
         raise  # Child
     except:
         self.errorLog('Forker.startFork error')
예제 #7
0
 def startFork(self,fdict):  ### Sets a new fork going using the data in fdict.
     '''Sets a new fork going using the data in fdict.'''
     try:### ~ [0] ~ Setup ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         fdict['cmd'] = self.list['ToFork'].pop(0)
         fdict['ID'] = 'Fork %d' % self.list['Forked'].index(fdict)
         fdict['FID'] = 'f_%s' % rje.randomString(6)
         if self.getBool('RjePy'):
             fdict['Log'] = '%s%s.log' % (self.getStr('RunPath'),fdict['FID'])
             fdict['cmd'] += ' basefile=%s' % (fdict['Log'])
             fdict['ResFile'] = self.list['ResFile'][0:]
             try: open(fdict['Log'],'w')
             except: self.errorLog('Log problem. Aborting fork.'); return self.endJob(fdict)
         ### ~ [2] ~ Add Fork ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         self.setNum({'KillTime':time.time()})
         cpid = os.fork()        # Fork child process
         if cpid:                # parent process records pid of child rsh process
             fdict['PID'] = cpid
             self.printLog('#FORK','Forking cmd as %s: %d remain; %.1f%% mem free' % (cpid,len(self.list['ToFork']),fdict['Mem']))
             self.printLog('#FORK','%s cmd: %s' % (cpid,fdict['cmd']))
         else:                   # child process
             os.system(fdict['cmd'])
             os._exit(0)
     except SystemExit: raise    # Child
     except: self.errorLog('Forker.startFork error')
예제 #8
0
def loadOrthAln(callobj,seq,gopher=True):    ### Identifies file, loads and checks alignment.
    '''
    Identifies file, loads and checks alignment. If the identified file is not actually aligned, then RJE_SEQ will try to
    align the proteins using MUSCLE or ClustalW.
    >> callobj:Object containing settings for stats generation (MotifList, generally).
    >> seq:Sequence being analysed.
    >> gopher:bool [True] = whether to try to generate alignment with GOPHER if callobj.opt['Gopher']
    << aln = SeqList object containing alignment with queryseq
    '''
    try:
        ### Setup Attributes ###
        v = callobj.stat['Verbose']
        alndir = rje.makePath(callobj.info['AlnDir'])
        alnext = callobj.info['AlnExt']
        
        ### Identify File ###
        if alnext[0] != '.': alnext = '.%s' % alnext
        alnstart = [seq.info['AccNum'],seq.info['ID'],seq.shortName(),None]
        if v > 2: callobj.log.printLog('#PRESTO','%s' % callobj.opt)  #!# Old debugging? #!#
        if callobj.opt['Gopher'] and callobj.opt['FullForce']:
            if v > 0: callobj.log.printLog('#ALN','FullForce=T. Will call Gopher for %s regardless of existing files' % seq.shortName())
            alnstart = [None]
        for file in alnstart:
            if file:
                file = '%s%s%s' % (alndir,file,alnext)
                if rje.checkForFile(file): break  # File found
            else:
                #!# Sort out logging and see if Gopher can be used directly rather than just run() #!#
                ### Run GOPHER ###
                if gopher and callobj.opt['Gopher']:  #!# Add working version for PRESTO and SlimPickings #!#
                    callobj.deBug('Run GOPHER in %s' % callobj.info['GopherDir'])
                    mydir = os.getcwd()
                    os.chdir(callobj.info['GopherDir'])
                    callobj.log.printLog('\n#GOPHER','Running GOPHER on %s' % seq.shortName())
                    try:    #!# Add log.silent() method? #!#
                        gcmd = ['orthtree'] + callobj.cmd_list + ['gnspacc=T','i=-1']
                        solo_gopher = gopher_V2.GopherFork(log=callobj.log,cmd_list=gcmd)
                        solo_gopher.info['Name'] = seq.shortName()
                        solo_gopher.obj['Sequence'] = seq
                        solo_gopher.obj['BLAST'] = gopher_V2.Gopher(callobj.log,gcmd).setupBlast()  #!# Contemplate setting up Gopher in callobj #!#
                        solo_gopher.obj['BLAST'].log = callobj.log
                        solo_gopher.run('orthalign')    #X#gopher_V2.Gopher(callobj.log,gcmd).setMode())
                    except:
                        os.chdir(mydir)
                        callobj.log.errorLog('Problem with Gopher run!')
                        return None
                        
                    if not 'old_school':                            
                        inputseq = 'tmp%s.fas' % rje.randomString(8)
                        TMP = open(inputseq,'w')
                        TMP.write('>%s\n%s\n' % (seq.info['Name'],seq.info['Sequence']))
                        TMP.close()
                        gcmd = ['orthtree'] + callobj.cmd_list + ['gopher=%s' % inputseq, 'gnspacc=T','i=-1']
                        try:
                            mygopher = gopher_V2.Gopher(log=callobj.log,cmd_list=gcmd)
                            mygopher.run()
                        except:
                            os.chdir(mydir)
                            callobj.log.errorLog('Problem with Gopher run!',printerror=False)
                            return None
                        rje_blast.cleanupDB(callobj,dbfile=inputseq,deletesource=True)
                    os.chdir(mydir)
                if callobj.opt['Gopher']:  
                    file = '%s%s%s' % (alndir,seq.info['AccNum'],alnext)
                    if not os.path.exists(file):
                        file = None
                if not file:
                    callobj.log.printLog('#ALN','No alignment file found for %s in %s.' % (seq.shortName(),alndir),screen=False)
                    return None
        
        ### Load Alignment ###
        callobj.log.stat['Verbose'] = v - 1
        alncmd = ['seqin=None','query=%s' % seq.shortName(),'accnr=F','seqnr=F','autofilter=F','align=T','gnspacc=F'] 
        aln = rje_seq.SeqList(log=callobj.log,cmd_list=callobj.cmd_list+alncmd)
        #X#print file
        aln.loadSeqs(seqfile=file,seqtype='Protein',aln=True,nodup=None)
        callobj.log.stat['Verbose'] = v 
        ## Check Query ##
        qry = aln.obj['QuerySeq']
        if not qry:
            if aln.querySeq(query=seq.info['AccNum']):
                qry = aln.obj['QuerySeq']
            else:
                callobj.log.printLog('#ALN','Problem finding %s in %s.' % (seq.shortName(),file),screen=False)
                return None

        ### Check Alignment ###
        if aln.seqNum() < 2:
            callobj.log.printLog('#ALN','Not enough sequences for %s in %s.' % (seq.shortName(),file),screen=False)
            return None
        if aln._checkAln(aln=True,realign=True):
            return aln
        else:
            callobj.log.printLog('#ERR','%s not aligned!!!' % (file))
            return None       
    except:
        callobj.log.errorLog('Something bad has happened in rje_motif_stats.loadOrthAln()')
        callobj.log.stat['Verbose'] = v 
        return None
예제 #9
0
 def ANCHOR(self, retry=2):  ### Runs ANCHOR disorder prediction
     '''Runs ANCHOR disorder prediction.'''
     try:  ### ~ [0] ~ Setup ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         ## ~ [0a] ~ Setup sequence and temp file ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
         sequence = self.info['Sequence'].upper()
         name = self.info['Name'][:4] + rje.randomString(8)
         tmp = name + '.tmp'
         ## ~ [0b] ~ Setup ANCHOR ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
         apath = self.info['ANCHOR']
         if os.path.basename(apath) == 'anchor':
             apath = os.path.dirname(apath)
         anchor = rje.makePath(apath) + 'anchor'
         if not os.path.exists(anchor):
             self.errorLog('Path "%s" not found!' % anchor,
                           printerror=False)
             retry = 0
             raise IOError
         ### ~ [1] Run ANCHOR Disorder prediction ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         open(tmp, 'w').write('>%s\n%s\n' % (name, sequence))
         acmd = '%s %s -d %s' % (anchor, tmp, apath)
         dlines = os.popen(acmd).readlines()
         try:
             os.unlink(tmp)
         except:
             self.errorLog('Cannot delete %s!' % tmp)
         ### ~ [2] Read in results ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         if self.info['Name'] not in ['', 'None']: name = self.info['Name']
         self.list['ResidueDisorder'] = []
         for d in dlines:
             if d[:1] == '#': continue
             if rje.matchExp('^(\d+)\s+(\S)\s+(\S+)', d):
                 dm = rje.matchExp('^(\d+)\s+(\S)\s+(\S+)', d)
                 pos = string.atoi(dm[0])
                 aa = dm[1]
                 score = string.atof(dm[2])
                 i = len(self.list['ResidueDisorder'])
                 if sequence[i] != aa:
                     self.log.errorLog(
                         '%s: Position %d is %s in sequence but %s in ANCHOR output!'
                         % (name, pos, sequence[i], aa),
                         printerror=False)
                     raise ValueError
                 if pos != (i + 1):
                     self.log.errorLog(
                         '%s: Position %d reached in ANCHOR output but previous results missing!'
                         % (name, pos),
                         printerror=False)
                     raise ValueError
                 self.list['ResidueDisorder'].append(score)
         if len(self.list['ResidueDisorder']) != len(sequence):
             self.log.errorLog(
                 '%s: Sequence = %d aa but ANCHOR results stop at %s!' %
                 (name, len(sequence), len(self.list['ResidueDisorder'])),
                 printerror=False)
             raise ValueError
         ### ~ [3] ~ Make Regions ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         self.list['RegionDisorder'] = []
         self.list['RegionFold'] = []
         start = 0
         fstart = 0
         i = 0
         dx = 0
         while i < len(sequence):
             score = self.list['ResidueDisorder'][i]
             i += 1
             if not start and score > self.stat[
                     'IUCut']:  ### Start new disorder ###
                 start = i
             elif start and score <= self.stat['IUCut']:  ### End!
                 self.list['RegionDisorder'].append((start, i - 1))
                 dx += i - start
                 start = 0
             if not fstart and score <= self.stat[
                     'IUCut']:  ### Start new fold ###
                 fstart = i
             elif fstart and score > self.stat['IUCut']:  ### End!
                 self.list['RegionFold'].append((fstart, i - 1))
                 fstart = 0
         if start:
             self.list['RegionDisorder'].append((start, len(sequence)))
             dx += len(sequence) + 1 - start
         if fstart: self.list['RegionFold'].append((fstart, len(sequence)))
         self.minRegion()
         if self.opt['PrintLog']:
             self.log.printLog(
                 '\r#DIS',
                 'ANCHOR Disorder prediction complete: %d disorder regions, %d disordered aa'
                 % (len(self.list['RegionDisorder']), dx))
         return True
     except:
         if retry:
             self.printLog('#RETRY', 'Trying %s again...' % name)
             return self.ANCHOR(retry - 1)
         self.log.errorLog(
             'Error in Disorder.ANCHOR(%s). Disorder prediction failed.' %
             name)
         self.list['RegionDisorder'] = []
         self.list['RegionFold'] = []
         return False
예제 #10
0
    def iuPred(self, retry=2):  ### Runs IUPred disorder prediction
        '''Runs IUPred disorder prediction.'''
        mydir = os.path.abspath(os.curdir)
        try:
            ### Setup sequence and temp file ###
            sequence = self.info['Sequence'].upper()
            name = self.info['Name'][:4] + rje.randomString(8)
            tmp = name + '.tmp'

            ### Run Disorder ###
            iupath = string.join(
                string.split(self.info['IUPath'], os.sep)[:-1], os.sep)
            iupred = string.split(self.info['IUPath'], os.sep)[-1]
            if self.opt['IUChDir']:
                os.chdir(
                    string.join(
                        string.split(self.info['IUPath'], os.sep)[:-1],
                        os.sep))
            open(tmp, 'w').write('>%s\n%s\n' % (name, sequence))
            if self.opt['IUChDir'] and self.opt['Win32']:
                iucmd = '%s %s %s' % (iupred, tmp,
                                      self.info['IUMethod'].lower())
            elif self.opt['IUChDir']:
                iucmd = './%s %s %s' % (iupred, tmp,
                                        self.info['IUMethod'].lower())
            else:
                iucmd = '%s %s %s' % (self.info['IUPath'], tmp,
                                      self.info['IUMethod'].lower())
            dlines = os.popen(iucmd).readlines()
            try:
                os.unlink(tmp)
            except:
                self.errorLog('Cannot delete %s!' % tmp)
            if self.opt['IUChDir']: os.chdir(mydir)
            if self.info['Name'] not in ['', 'None']: name = self.info['Name']
            self.list['ResidueDisorder'] = []
            for d in dlines:
                if rje.matchExp('^\s*(\d+)\s+(\S)\s+(\S+)', d):
                    dm = rje.matchExp('^\s*(\d+)\s+(\S)\s+(\S+)', d)
                    pos = string.atoi(dm[0])
                    aa = dm[1]
                    score = string.atof(dm[2])
                    i = len(self.list['ResidueDisorder'])
                    if sequence[i] != aa:
                        self.log.errorLog(
                            '%s: Position %d is %s in sequence but %s in IUPred output!'
                            % (name, pos, sequence[i], aa),
                            printerror=False)
                        raise ValueError
                    if pos != (i + 1):
                        self.log.errorLog(
                            '%s: Position %d reached in IUPred output but previous results missing!'
                            % (name, pos),
                            printerror=False)
                        raise ValueError
                    self.list['ResidueDisorder'].append(score)
            if len(self.list['ResidueDisorder']) != len(sequence):
                self.log.errorLog(
                    '%s: Sequence = %d aa but IUPred results stop at %s!' %
                    (name, len(sequence), len(self.list['ResidueDisorder'])),
                    printerror=False)
                raise ValueError

            ### Make Regions ###
            self.list['RegionDisorder'] = []
            self.list['RegionFold'] = []
            start = 0
            fstart = 0
            i = 0
            dx = 0
            while i < len(sequence):
                score = self.list['ResidueDisorder'][i]
                i += 1
                if not start and score > self.stat[
                        'IUCut']:  ### Start new disorder ###
                    start = i
                elif start and score <= self.stat['IUCut']:  ### End!
                    self.list['RegionDisorder'].append((start, i - 1))
                    dx += i - start
                    start = 0
                if not fstart and score <= self.stat[
                        'IUCut']:  ### Start new fold ###
                    fstart = i
                elif fstart and score > self.stat['IUCut']:  ### End!
                    self.list['RegionFold'].append((fstart, i - 1))
                    fstart = 0
            if start:
                self.list['RegionDisorder'].append((start, len(sequence)))
                dx += len(sequence) + 1 - start
            if fstart: self.list['RegionFold'].append((fstart, len(sequence)))
            self.minRegion()
            if self.opt['PrintLog']:
                self.log.printLog(
                    '\r#DIS',
                    'IUPred (%s) Disorder prediction complete: %d disorder regions, %d disordered aa'
                    % (self.info['IUMethod'].lower(),
                       len(self.list['RegionDisorder']), dx))
            return True
        except:
            if self.opt['IUChDir']: os.chdir(mydir)
            if retry:
                self.printLog('#RETRY', 'Trying %s again...' % name)
                return self.iuPred(retry - 1)
            self.log.errorLog(
                'Error in Disorder.iuPred(%s). Disorder prediction failed. Check (setenv?) IUPred_PATH environment variable.'
                % name)
            self.list['RegionDisorder'] = []
            self.list['RegionFold'] = []
            #try: os.system('rm %s*tmp' % (rje.makePath(os.path.split(self.info['IUPath'])[0])))
            #except: pass
            return False
예제 #11
0
 def ANCHOR(self,retry=2):     ### Runs ANCHOR disorder prediction
     '''Runs ANCHOR disorder prediction.'''
     try:### ~ [0] ~ Setup ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         ## ~ [0a] ~ Setup sequence and temp file ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
         sequence = self.info['Sequence'].upper()
         name = self.info['Name'][:4] + rje.randomString(8)
         tmp = name + '.tmp'
         ## ~ [0b] ~ Setup ANCHOR ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
         apath = self.info['ANCHOR']
         if os.path.basename(apath) == 'anchor': apath = os.path.dirname(apath)
         anchor = rje.makePath(apath) + 'anchor'
         if not os.path.exists(anchor):
             self.errorLog('Path "%s" not found!' % anchor,printerror=False)
             retry = 0; raise IOError
         ### ~ [1] Run ANCHOR Disorder prediction ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         open(tmp,'w').write('>%s\n%s\n' % (name,sequence))
         acmd = '%s %s -d %s' % (anchor,tmp,apath)
         dlines = os.popen(acmd).readlines()
         try: os.unlink(tmp)
         except: self.errorLog('Cannot delete %s!' % tmp)
         ### ~ [2] Read in results ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         if self.info['Name'] not in ['','None']: name = self.info['Name']
         self.list['ResidueDisorder'] = []
         for d in dlines:
             if d[:1] == '#': continue
             if rje.matchExp('^(\d+)\s+(\S)\s+(\S+)',d):
                 dm = rje.matchExp('^(\d+)\s+(\S)\s+(\S+)',d)
                 pos = string.atoi(dm[0])
                 aa = dm[1]
                 score = string.atof(dm[2])
                 i = len(self.list['ResidueDisorder'])
                 if sequence[i] != aa:
                     self.log.errorLog('%s: Position %d is %s in sequence but %s in ANCHOR output!' % (name,pos,sequence[i],aa),printerror=False)
                     raise ValueError
                 if pos != (i + 1):
                     self.log.errorLog('%s: Position %d reached in ANCHOR output but previous results missing!' % (name,pos),printerror=False)
                     raise ValueError
                 self.list['ResidueDisorder'].append(score)
         if len(self.list['ResidueDisorder']) != len(sequence):
             self.log.errorLog('%s: Sequence = %d aa but ANCHOR results stop at %s!' % (name,len(sequence),len(self.list['ResidueDisorder'])),printerror=False)
             raise ValueError
         ### ~ [3] ~ Make Regions ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         self.list['RegionDisorder'] = []
         self.list['RegionFold'] = []
         start = 0
         fstart = 0
         i = 0
         dx = 0
         while i < len(sequence):
             score = self.list['ResidueDisorder'][i]
             i += 1
             if not start and score > self.stat['IUCut']:    ### Start new disorder ###
                 start = i
             elif start and score <= self.stat['IUCut']:     ### End!
                 self.list['RegionDisorder'].append((start,i-1))
                 dx += i - start
                 start = 0
             if not fstart and score <= self.stat['IUCut']:    ### Start new fold ###
                 fstart = i
             elif fstart and score > self.stat['IUCut']:     ### End!
                 self.list['RegionFold'].append((fstart,i-1))
                 fstart = 0
         if start:
             self.list['RegionDisorder'].append((start,len(sequence)))
             dx += len(sequence) + 1 - start
         if fstart: self.list['RegionFold'].append((fstart,len(sequence)))
         self.minRegion()
         if self.opt['PrintLog']: self.log.printLog('\r#DIS','ANCHOR Disorder prediction complete: %d disorder regions, %d disordered aa' % (len(self.list['RegionDisorder']),dx))
         return True
     except:
         if retry:
             self.printLog('#RETRY','Trying %s again...' % name)
             return self.ANCHOR(retry-1)
         self.log.errorLog('Error in Disorder.ANCHOR(%s). Disorder prediction failed.' % name)
         self.list['RegionDisorder'] = []
         self.list['RegionFold'] = []
         return False
예제 #12
0
    def iuPred(self,retry=2):     ### Runs IUPred disorder prediction
        '''Runs IUPred disorder prediction.'''
        mydir = os.path.abspath(os.curdir)
        try:
            ### Setup sequence and temp file ###
            sequence = self.info['Sequence'].upper()
            name = self.info['Name'][:4] + rje.randomString(8)
            tmp = name + '.tmp'
            
            ### Run Disorder ###
            iupath = string.join(string.split(self.info['IUPath'],os.sep)[:-1],os.sep)
            iupred = string.split(self.info['IUPath'],os.sep)[-1]
            if self.opt['IUChDir']: os.chdir(string.join(string.split(self.info['IUPath'],os.sep)[:-1],os.sep))
            open(tmp,'w').write('>%s\n%s\n' % (name,sequence))
            if self.opt['IUChDir'] and self.opt['Win32']: iucmd = '%s %s %s' % (iupred,tmp,self.info['IUMethod'].lower())
            elif self.opt['IUChDir']: iucmd = './%s %s %s' % (iupred,tmp,self.info['IUMethod'].lower())
            else: iucmd = '%s %s %s' % (self.info['IUPath'],tmp,self.info['IUMethod'].lower())
            dlines = os.popen(iucmd).readlines()
            try: os.unlink(tmp)
            except: self.errorLog('Cannot delete %s!' % tmp)
            if self.opt['IUChDir']: os.chdir(mydir)            
            if self.info['Name'] not in ['','None']: name = self.info['Name']
            self.list['ResidueDisorder'] = []
            for d in dlines:
                if rje.matchExp('^\s*(\d+)\s+(\S)\s+(\S+)',d):
                    dm = rje.matchExp('^\s*(\d+)\s+(\S)\s+(\S+)',d)
                    pos = string.atoi(dm[0])
                    aa = dm[1]
                    score = string.atof(dm[2])
                    i = len(self.list['ResidueDisorder'])
                    if sequence[i] != aa:
                        self.log.errorLog('%s: Position %d is %s in sequence but %s in IUPred output!' % (name,pos,sequence[i],aa),printerror=False)
                        raise ValueError
                    if pos != (i + 1):
                        self.log.errorLog('%s: Position %d reached in IUPred output but previous results missing!' % (name,pos),printerror=False)
                        raise ValueError
                    self.list['ResidueDisorder'].append(score)
            if len(self.list['ResidueDisorder']) != len(sequence):
                self.log.errorLog('%s: Sequence = %d aa but IUPred results stop at %s!' % (name,len(sequence),len(self.list['ResidueDisorder'])),printerror=False)
                raise ValueError

            ### Make Regions ###
            self.list['RegionDisorder'] = []
            self.list['RegionFold'] = []
            start = 0
            fstart = 0
            i = 0
            dx = 0
            while i < len(sequence):
                score = self.list['ResidueDisorder'][i]
                i += 1
                if not start and score > self.stat['IUCut']:    ### Start new disorder ###
                    start = i
                elif start and score <= self.stat['IUCut']:     ### End!
                    self.list['RegionDisorder'].append((start,i-1))
                    dx += i - start
                    start = 0
                if not fstart and score <= self.stat['IUCut']:    ### Start new fold ###
                    fstart = i
                elif fstart and score > self.stat['IUCut']:     ### End!
                    self.list['RegionFold'].append((fstart,i-1))
                    fstart = 0
            if start:
                self.list['RegionDisorder'].append((start,len(sequence)))
                dx += len(sequence) + 1 - start
            if fstart: self.list['RegionFold'].append((fstart,len(sequence)))
            self.minRegion()
            if self.opt['PrintLog']: self.log.printLog('\r#DIS','IUPred (%s) Disorder prediction complete: %d disorder regions, %d disordered aa' % (self.info['IUMethod'].lower(),len(self.list['RegionDisorder']),dx))
            return True
        except:
            if self.opt['IUChDir']: os.chdir(mydir)            
            if retry:
                self.printLog('#RETRY','Trying %s again...' % name)
                return self.iuPred(retry-1)
            self.log.errorLog('Error in Disorder.iuPred(%s). Disorder prediction failed. Check (setenv?) IUPred_PATH environment variable.' % name)
            self.list['RegionDisorder'] = []
            self.list['RegionFold'] = []
            #try: os.system('rm %s*tmp' % (rje.makePath(os.path.split(self.info['IUPath'])[0])))
            #except: pass
            return False
예제 #13
0
 def uniFake(
     self,
     seqs=[],
     store=False
 ):  ### Main UniFake method. Runs on sequences in self.obj['SeqList'] if no seqs.
     '''Main UniFake method. Runs on sequences in self.obj['SeqList'] if no seqs given.'''
     try:  ### ~ [1] Setup ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         unifake = string.split(string.join(self.list['UniFake']).lower())
         seqlist = self.obj['SeqList']
         if seqs: seqlist.seq = seqs
         else: seqs = seqlist.seq
         (sx, seqnum) = (0, seqlist.seqNum())
         ## ~ [1b] Setup UniProt object and output file ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
         uniprot = rje_uniprot.UniProt(
             self.log, self.cmd_list)  # UniProt object for saving data
         if self.info['DatOut'].lower() in ['', 'none']:
             self.info['DatOut'] = rje.baseFile(
                 seqlist.info['Name']) + '.dat'
         datfile = self.info['DatOut']
         if os.path.exists(datfile): rje.backup(self, datfile)
         if store: seqlist.obj['UniProt'] = uniprot
         ## ~ [1c] Setup RJE_HMM object ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
         if 'pfam' in unifake:
             hmm = rje_hmm.HMMRun(self.log, self.cmd_list + ['force=T'])
             hmmfile = '%s.pfam.tdt' % rje.baseFile(datfile)
             if os.path.exists(hmmfile): rje.backup(self, hmmfile)
             hmm.list['HMM'] = [self.info['PFam']]
             hmm.opt['HMMPFam'] = True
         else:
             hmm = None
         ## ~ [1d] Setup RJE_TM object ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
         if 'signalp' in unifake: tm = rje_tm.TM(self.log, self.cmd_list)
         else: tm = None
         ### ~ [2] ~ Perform UniFake processing ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         for seq in seqs:
             sx += 1
             name = seq.shortName()
             self.printLog(
                 '#SEQ', 'Processing %s (%s aa) %s...' %
                 (seq.shortName(), rje.integerString(
                     seq.aaLen()), seq.info['Description'][:50]))
             try:
                 ## ~ [2a] ~ Basic data ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
                 utmp = 'tmp%s.%s' % (rje.randomString(5),
                                      seq.info['AccNum'])
                 open('%s.fas' % utmp, 'w').write(
                     '>%s\n%s\n' % (seq.shortName(), seq.info['Sequence']))
                 udata = {
                     'CC': ['-!- Features generated using unifake.py'],
                     'AC': []
                 }
                 if seq.info['SpecCode'] in ['Unknown', 'UNK']:
                     seq.info['SpecCode'] = self.info['SPCode']
                 #x#elif seq.info['Species'] != 'None': udata['OS'] = [seq.info['Species']]     #!# Check how well this works. Add spectable? #!#
                 ## ~ [2b] ~ Aliases ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
                 if self.opt['EnsDat'] and rje.matchExp(
                         '\[acc:(\S+) pep:(\S+) gene:(\S+)\]',
                         seq.info['Name']):
                     details = rje.matchExp(
                         '\[acc:(\S+) pep:(\S+) gene:(\S+)\]',
                         seq.info['Name'])
                     self.addAlias(seq.info['AccNum'], details[0])
                     self.addAlias(seq.info['AccNum'], details[1])
                     self.addAlias(seq.info['AccNum'], details[2])
                     udata['GN'] = [details[2]]
                 for id in [seq.shortName(), seq.info['AccNum']]:
                     if id in self.dict['Aliases']:
                         udata['AC'].append(
                             '%s;' %
                             string.join(self.dict['Aliases'][id], '; '))
                 ## ~ [2c] ~ Features ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
                 ft = []  # List of features for sequence
                 for id in [
                         seq.shortName(), seq.info['AccNum'], seq.info['ID']
                 ]:
                     if id in self.dict['Features']:
                         ft += self.dict['Features'][id]
                 ## ~ [2d] IUPRED disorder prediction ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
                 if 'disorder' in self.list['UniFake']:
                     try:
                         seq.disorder()
                         dis = seq.obj['Disorder']
                         for disorder in seq.obj['Disorder'].list[
                                 'RegionDisorder']:
                             ft.append({
                                 'Type':
                                 'DISORDER',
                                 'Desc':
                                 'Predicted disorder: %s' %
                                 seq.obj['Disorder'].info['Disorder'],
                                 'Start':
                                 disorder[0],
                                 'End':
                                 disorder[1]
                             })
                             if dis.info['Disorder'].lower() == 'iupred':
                                 ft[-1]['Desc'] = '%s > %.2f' % (
                                     ft[-1]['Desc'], dis.stat['IUCut'])
                         for fold in seq.obj['Disorder'].list['RegionFold']:
                             ft.append({
                                 'Type':
                                 'ORDER',
                                 'Desc':
                                 'Predicted order: %s' %
                                 seq.obj['Disorder'].info['Disorder'],
                                 'Start':
                                 fold[0],
                                 'End':
                                 fold[1]
                             })
                             if dis.info['Disorder'].lower() == 'iupred':
                                 ft[-1]['Desc'] = '%s <= %.2f' % (
                                     ft[-1]['Desc'], dis.stat['IUCut'])
                     except:
                         self.log.errorLog(
                             'UniFake disorder problem for %s.' % name)
                 ## ~ [2e] PFam HMM domain prediction ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
                 if hmm:
                     try:
                         hmm.setInfo({
                             'SearchDB': '%s.fas' % utmp,
                             'HMMOut': '%s.hmm.out' % utmp
                         })  # This will be made for each sequence
                         hmm.search = []
                         hmm.list['HMMRes'] = [
                             hmm.hmmSearch(self.info['PFam'],
                                           outfile=hmm.info['HMMOut'])
                         ]  # Used in hmmTable
                         hmm.hmmTable(outfile=hmmfile, append=True)
                         if 'disorder' in self.list['UniFake']:
                             disorder = seq.obj['Disorder'].list[
                                 'ResidueDisorder']  # individual (IUPRed) residue results
                         else:
                             disorder = []
                         if hmm.search:
                             udata['CC'].append(
                                 'PFam: HMMer PFam search vs %s (Modified %s)'
                                 %
                                 (self.info['PFam'],
                                  time.ctime(
                                      os.path.getmtime(self.info['PFam']))))
                         else:
                             udata['CC'].append(
                                 '-!- ERROR: PFam HMMer Search failure!')
                             out = {'Type': '!ERROR!', 'Name': name}
                             rje.delimitedFileOutput(
                                 self,
                                 hmmfile, [
                                     'Type', 'Name', 'Start', 'End', 'Eval',
                                     'Score'
                                 ],
                                 datadict=out)
                         for search in hmm.search:
                             for hit in search.hit:
                                 for aln in hit.aln:
                                     pfamft = {
                                         'Start':
                                         aln.stat['SbjStart'],
                                         'End':
                                         aln.stat['SbjEnd'],
                                         'Type':
                                         'PFAM',
                                         'Desc':
                                         '%s PFam HMM Eval: %.2e; Score: %.1f'
                                         % (search.info['Name'],
                                            aln.stat['Expect'],
                                            aln.stat['BitScore'])
                                     }
                                     if disorder:
                                         region = disorder[
                                             aln.stat['SbjStart'] -
                                             1:aln.stat['SbjEnd']]
                                         hmmdisorder = float(
                                             sum(region)) / len(region)
                                         pfamft[
                                             'Desc'] = '%s; IUPRed: %.2f' % (
                                                 pfamft['Desc'],
                                                 hmmdisorder)
                                         if hmmdisorder < self.stat[
                                                 'DisDom']:
                                             pfamft['Type'] = 'DOMAIN'
                                     ft.append(pfamft)
                     except:
                         self.log.errorLog(
                             'UniFake PFam HMM problem for %s.' % name)
                 ## ~ [2f] TMHMM transmembrane topology prediction ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
                 if 'tmhmm' in unifake:
                     try:
                         tmdat = os.popen(
                             '%s %s.fas -short' %
                             (self.info['TMHMM'], utmp)).readlines()
                         domlist = rje_tm.domainList(
                             rje_tm.parseTMHMM(tmdat[0]))
                         for tmdom in domlist:
                             ft.append(tmdom)
                             ft[-1]['Desc'] = 'TMHMM topology prediction'
                             ft[-1]['Start'] = string.atoi(ft[-1]['Start'])
                             ft[-1]['End'] = string.atoi(ft[-1]['End'])
                         if len(domlist) > 1:
                             udata['CC'].append(
                                 'TMHMM: %d TM domains; N-Term %s' %
                                 ((len(domlist) - 1) / 2,
                                  domlist[0]['Type']))
                         else:
                             udata['CC'].append('TMHMM: 0 TM domains')
                     except:
                         self.log.errorLog('UniFake TMHMM problem for %s.' %
                                           name)
                 ## ~ [2g] SIGNALP signal peptide prediction ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
                 if 'signalp' in unifake:
                     try:
                         os.system(
                             '%s -f short -t euk %s.fas > %s.signalp' %
                             (self.info['SignalP'], utmp, utmp))
                         tm.signalp = {}
                         tm.parseSignalP('%s.signalp' % utmp)
                         sigp = tm.signalp.pop(seq.shortName())
                         cpos = 0
                         if sigp['nn_ymax?'] == 'Y':
                             cpos = string.atoi(sigp['nn_ymaxpos'])
                             desc = 'SignalP NN prediction'
                         if sigp['hmm_cmax?'] == 'Y':
                             hmm_c = string.atoi(sigp['hmm_cmaxpos'])
                             if cpos == 0:
                                 cpos = hmm_c
                                 desc = 'SignalP HMM prediction'
                             else:
                                 if hmm_c < cpos:
                                     cpos = hmm_c
                                     desc = 'SignalP HMM prediction (NN also Y)'
                                 else:
                                     desc += ' (HMM also Y)'
                         if cpos > 0:
                             ft.append({
                                 'Type': 'SIGNALP',
                                 'Desc': desc,
                                 'Start': 1,
                                 'End': cpos
                             })
                     except:
                         self.log.errorLog(
                             'UniFake SignalP problem for %s.' % name)
                 ## ~ [2h] Convert to UniProt and save ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
                 self.addRealUniProt(seq, udata, ft)
                 self.deBug(ft)
                 if not store: uniprot.list['Entry'] = []
                 if uniprot.addFromSeq(
                         seq, data=udata,
                         ft=ft):  ### Converts into UniProtEntry object
                     if not store: uniprot.saveUniProt(datfile, append=True)
                     #x#open(self.info['DatPickup'],'a').write('%s\n' % seq.shortName())
             ## ~ [2f] Cleanup ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
             except:
                 self.log.errorLog('Problem during UniFake(%s)' % name)
             for tmp in glob.glob('%s*' % utmp):
                 os.unlink(tmp)
             self.printLog(
                 '#UNIFAKE',
                 '|---------- %s run <<<|>>> %s to go -----------|' %
                 (rje.integerString(sx), rje.integerString(seqnum - sx)),
                 log=False)
         if store: uniprot.saveUniProt(datfile, append=False)
         if self.opt['CleanUp']:
             for tmp in glob.glob('TMHMM*'):
                 if os.path.isdir(tmp): os.rmdir(tmp)
     except:
         self.errorLog(
             'Oh, the shame of it! Trouble during UniFake.uniFake()')
예제 #14
0
    def forker(self):      ### Generic forking method
        '''
        Generic method for forking (without threads).
        Add description here (and arguments.)
        '''
        try:
            ### <0> ### Setup
            _stage = '<0> Fork Setup'
            forkx = int(self.stat['Forks'])   # Number of forks to have running at one time
            if self.opt['Win32'] or forkx < 1:
                self.opt['NoForks'] = True
            forks = []  # List of active fork PIDs
            killforks = int(self.stat['KillForks']) # Time in seconds to wait after main thread has apparently finished
            forking_condition = True    # Condition to keep forking

            ### Sequence List setup ###
            _stage = '<1> Forking'
            seqx = 0    # Sequence Counter
            subx = 0    # Subset sequence counter
            outfile = None  # Output file name
            randlist = []   # List of random strings for split sequence files
            filedict = {}   # Dictionary of input files for each random string
            seqlist = rje_seq.SeqList(log=self.log,cmd_list=['autoload=F']+self.cmd_list)
            seqlist.makeBaseFile()
            SEQFILE = open(seqlist.info['Name'], 'r')
            (seq,lastline) = seqlist.nextFasSeq(SEQFILE,'Starting')
            while seq:
                seqlist.seq = [seq]
                if self.info['StartFrom'] != 'None':    # Not yet reached wanted sequence
                    if self.info['StartFrom'] in [seq.info['Name'], seq.info['ID'], seq.info['AccNum'], seq.shortName()]:
                        self.info['StartFrom'] = 'None'
                if self.info['StartFrom'] == 'None':    # Wanted sequence
                    if outfile:   # Create new file
                        SEQOUT = open(outfile,'a')
                    else:
                        rs = rje.randomString(6)
                        while rs in randlist:
                            rs = rje.randomString(6)
                        outfile = '%s.%s.fas' % (seqlist.info['Basefile'],rs)
                        SEQOUT = open(outfile,'w')
                        randlist.append(rs)
                        filedict[rs] = outfile
                    SEQOUT.write('>%s\n%s\n' % (seq.info['Name'],seq.info['Sequence']))
                    SEQOUT.close()
                    seqx += 1
                    subx += 1
                    if subx == self.stat['Split']:  # Finished split
                        self.log.printLog('#SEQ','%s sequences output to %s.' % (rje.integerString(subx),outfile))
                        outfile = None
                        subx = 0
                (seq,lastline) = seqlist.nextFasSeq(SEQFILE,lastline)
            if subx > 0:
                self.log.printLog('#SEQ','%s sequences output to %s.' % (rje.integerString(subx),outfile))
            self.log.printLog('#SEQ','%s sequences output in total to %d files.' % (rje.integerString(seqx),len(randlist)))
            # Now have the list of random strings in randlist (in order) and filenames in filedict

            ### <1> ### Forking                            
            killtime = time.time()
            dealt_with = 0      # Split files dealt with
            while dealt_with < len(randlist) or len(forks):

                ## <a> ## forks
                _stage = '<1a> New Forks'
                while dealt_with < len(randlist) and (len(forks) < forkx or self.opt['NoForks']):     # Add more forks
                    _stage = '<1a-i> Fork: Get stuff for fork'
                    killtime = time.time()  # Reset killtime - still doing stuff
                    # Add new fork
                    _stage = '<1a-ii> Fork: New Fork'
                    new_fork_id = randlist[dealt_with]
                    dealt_with += 1
                    
                    outcmd = string.split(self.info['OutCmd'],'.')
                    if len(outcmd) > 1:
                        outcmd = outcmd[:-1] + [new_fork_id] + outcmd[-1:]
                    else:
                        outcmd = outcmd + [new_fork_id] + ['resfile']
                    outcmd = string.join(outcmd,'.')    
                    forkcmd = '%s %s%s %s %s log=%s.log newlog=T i=-1' % (self.info['ForkProg'],self.info['SeqInCmd'],filedict[new_fork_id],outcmd,self.info['ForkCmd'],new_fork_id)
                    if self.opt['NoForks']:
                        os.system(forkcmd)                        
                    else:   # Forks
                        newpid = os.fork() 
                        if newpid == 0: # child 
                            os.system(forkcmd)
                            sys.exit()    # Exit process 
                        elif newpid == -1: # error
                            self.log.errorLog('Problem forking %s.' % new_fork_id)
                        else: 
                            forks.append(newpid)    # Add fork to list 
            
                ## <b> ## Monitor and remove finished forks
                _stage = '<1b> Finished Forks'
                forklist = self._activeForks(forks)
                if len(forklist) != len(forks):
                    self.verbose(0,2,' => %d of %d forks finished!' % (len(forks) - len(forklist),len(forks)),1)
                    forks = forklist[0:]
                        
                self.verbose(3,3,'End of a Cycle.',2)

                ## <c> ## Look for eternal hanging of forks
                _stage = '<1c> Hanging'
                if time.time() - killtime > killforks:
                    self.verbose(0,1,'\n%d seconds of main program inactivity. %d forks still active!' % (killforks,len(forks)),1)
                    for fork in forks:
                        self.verbose(0,2,' => Fork PID %d still Active!' % (fork),1)
                    if rje.yesNo('Kill?'):
                        break   #!# killing options
                    else:
                        killtime = time.time()

            ### <3> ### Finish
            _stage = '<3> Finish'
            if len(forks) > 0:
                self.log.errorLog('%d Forks still active after %d seconds of main program inactivity' % (len(forks),killforks),True)
            else:
                self.verbose(0,1,'Forks have finished.',2)

            ### <4> ### Recompile results
            for randstr in randlist:
                os.unlink(filedict[randstr])
                rje.fileTransfer(fromfile='%s.log' % randstr,tofile=self.log.info['Name'],deletefrom=True)            
                outfiles = glob.glob('*.%s.*' % randstr)
                for outfile in outfiles:
                    compfile = outfile.split('.')
                    compfile.remove(randstr)
                    compfile = string.join(compfile,'.')
                    if randstr == randlist[0] and os.path.exists(compfile) and not self.opt['Append']:
                        os.unlink(compfile)
                    rje.fileTransfer(fromfile=outfile,tofile=compfile,deletefrom=True)
                    self.verbose(1,2,'Copying results data from %s to %s...' % (outfile,compfile),0)
                self.verbose(0,1,'%d results files copied for Split %d.' % (len(outfiles),(randlist.index(randstr)+1)),1)
            self.log.printLog('#OUT','Results for %d splits compiled.' % len(randlist))
            
        except SystemExit:  # Don't want forks raising an Exception upon exiting
            sys.exit()
        except:
            self.log.errorLog('Error in forker(%s):' % _stage,printerror=True,quitchoice=False)     
            raise   # Delete this if method error not terrible
예제 #15
0
 def nextJob(self,host_id):  ### Sets an new job running on host with given index                                #V1.0
     '''Sets an new job running on host with given index.'''
     try:### ~ [1] ~ Setup ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         node = self.list['Hosts'][host_id]
         freemem = self.freeMem(node)
         if self.getBool('SeqBySeq'): return self.nextSeqJob(host_id)
         jdict = self.dict['Running'][host_id] = {}      # Setup empty dictionary to fill, if jobs available
         if self.getNum('MemFree') > freemem:
             jdict['PID'] = 'WAIT - %.1f%% %s mem' % (freemem*100.0,node)
             return
         if self.list['SubJobs']: job = self.list['SubJobs'].pop(0)
         else: return
         if self.getBool('RjePy'): jdict['Log'] = '%si_%s.log' % (self.getStr('RunPath'),rje.randomString(6))
         if self.getStr('JobINI'): job = '%s ini=%s' % (job,self.getStr('JobINI'))
         if 'Log' in jdict:
             job = '%s log=%s' % (job,jdict['Log'])
             try: open(jdict['Log'],'w')
             except: self.errorLog('Log problem. Aborting %s job.' % host_id); return self.endJob(host_id)
         ### ~ [2] ~ Add Job ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         rsh = "rsh %s '%s'" % (self.list['Hosts'][host_id],job)
         cpid = os.fork()        # Fork child process
         if cpid:                # parent process records pid of child rsh process
             jdict['PID'] = cpid
             if self.rsh(): self.printLog('#SUB',rsh)
             else: self.printLog('#SUB',job)
             self.printLog('#JOB','Running job as %s: %d remain; %.1f%% mem free' % (cpid,len(self.list['SubJobs']),freemem*100.0))
         else:                   # child process
             if self.rsh(): os.system(rsh)
             else: os.system(job)
             os._exit(0)
     except SystemExit: raise    # Child
     except: self.errorLog('JobFarmer.nextJob error')
예제 #16
0
 def uniFake(self,seqs=[],store=False):  ### Main UniFake method. Runs on sequences in self.obj['SeqList'] if no seqs.
     '''Main UniFake method. Runs on sequences in self.obj['SeqList'] if no seqs given.'''
     try:### ~ [1] Setup ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         unifake = string.split(string.join(self.list['UniFake']).lower())
         seqlist = self.obj['SeqList']
         if seqs: seqlist.seq = seqs
         else: seqs = seqlist.seq
         (sx,seqnum) = (0,seqlist.seqNum())
         ## ~ [1b] Setup UniProt object and output file ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
         uniprot = rje_uniprot.UniProt(self.log,self.cmd_list)   # UniProt object for saving data
         if self.info['DatOut'].lower() in ['','none']: self.info['DatOut'] = rje.baseFile(seqlist.info['Name']) + '.dat'
         datfile = self.info['DatOut']
         if os.path.exists(datfile): rje.backup(self,datfile)
         if store: seqlist.obj['UniProt'] = uniprot
         ## ~ [1c] Setup RJE_HMM object ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
         if 'pfam' in unifake:
             hmm = rje_hmm.HMMRun(self.log,self.cmd_list+['force=T'])
             hmmfile = '%s.pfam.tdt' % rje.baseFile(datfile)
             if os.path.exists(hmmfile): rje.backup(self,hmmfile)
             hmm.list['HMM'] = [self.info['PFam']]
             hmm.opt['HMMPFam'] = True
         else: hmm = None
         ## ~ [1d] Setup RJE_TM object ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
         if 'signalp' in unifake: tm = rje_tm.TM(self.log,self.cmd_list)
         else: tm = None
         ### ~ [2] ~ Perform UniFake processing ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         for seq in seqs:
             sx += 1
             name = seq.shortName()                    
             self.printLog('#SEQ','Processing %s (%s aa) %s...' % (seq.shortName(),rje.integerString(seq.aaLen()),seq.info['Description'][:50]))
             try:
                 ## ~ [2a] ~ Basic data ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
                 utmp = 'tmp%s.%s' % (rje.randomString(5),seq.info['AccNum'])
                 open('%s.fas' % utmp,'w').write('>%s\n%s\n' % (seq.shortName(),seq.info['Sequence']))
                 udata = {'CC':['-!- Features generated using unifake.py'],'AC':[]}
                 if seq.info['SpecCode'] in ['Unknown','UNK']: seq.info['SpecCode'] = self.info['SPCode']
                 #x#elif seq.info['Species'] != 'None': udata['OS'] = [seq.info['Species']]     #!# Check how well this works. Add spectable? #!#
                 ## ~ [2b] ~ Aliases ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
                 if self.opt['EnsDat'] and rje.matchExp('\[acc:(\S+) pep:(\S+) gene:(\S+)\]',seq.info['Name']):
                     details = rje.matchExp('\[acc:(\S+) pep:(\S+) gene:(\S+)\]',seq.info['Name'])
                     self.addAlias(seq.info['AccNum'],details[0])
                     self.addAlias(seq.info['AccNum'],details[1])
                     self.addAlias(seq.info['AccNum'],details[2])
                     udata['GN'] = [details[2]]
                 for id in [seq.shortName(),seq.info['AccNum']]:
                     if id in self.dict['Aliases']: udata['AC'].append('%s;' % string.join(self.dict['Aliases'][id],'; '))
                 ## ~ [2c] ~ Features ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
                 ft = []     # List of features for sequence
                 for id in [seq.shortName(),seq.info['AccNum'],seq.info['ID']]:
                     if id in self.dict['Features']: ft += self.dict['Features'][id]                        
                 ## ~ [2d] IUPRED disorder prediction ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
                 if 'disorder' in self.list['UniFake']:
                     try:
                         seq.disorder()
                         dis = seq.obj['Disorder']
                         for disorder in seq.obj['Disorder'].list['RegionDisorder']:
                             ft.append({'Type':'DISORDER','Desc':'Predicted disorder: %s' % seq.obj['Disorder'].info['Disorder'],'Start':disorder[0],'End':disorder[1]})
                             if dis.info['Disorder'].lower() == 'iupred': ft[-1]['Desc'] = '%s > %.2f' % (ft[-1]['Desc'],dis.stat['IUCut'])
                         for fold in seq.obj['Disorder'].list['RegionFold']:
                             ft.append({'Type':'ORDER','Desc':'Predicted order: %s' % seq.obj['Disorder'].info['Disorder'],'Start':fold[0],'End':fold[1]})
                             if dis.info['Disorder'].lower() == 'iupred': ft[-1]['Desc'] = '%s <= %.2f' % (ft[-1]['Desc'],dis.stat['IUCut'])
                     except: self.log.errorLog('UniFake disorder problem for %s.' % name)
                 ## ~ [2e] PFam HMM domain prediction ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
                 if hmm:
                     try:
                         hmm.setInfo({'SearchDB':'%s.fas' % utmp,'HMMOut':'%s.hmm.out' % utmp})      # This will be made for each sequence                    
                         hmm.search = []
                         hmm.list['HMMRes'] = [hmm.hmmSearch(self.info['PFam'],outfile=hmm.info['HMMOut'])]   # Used in hmmTable
                         hmm.hmmTable(outfile=hmmfile,append=True)
                         if 'disorder' in self.list['UniFake']: disorder = seq.obj['Disorder'].list['ResidueDisorder']          # individual (IUPRed) residue results
                         else: disorder = []
                         if hmm.search: udata['CC'].append('PFam: HMMer PFam search vs %s (Modified %s)' % (self.info['PFam'],time.ctime(os.path.getmtime(self.info['PFam']))))
                         else:
                             udata['CC'].append('-!- ERROR: PFam HMMer Search failure!')
                             out = {'Type':'!ERROR!','Name':name}
                             rje.delimitedFileOutput(self,hmmfile,['Type','Name','Start','End','Eval','Score'],datadict=out)
                         for search in hmm.search:
                             for hit in search.hit:
                                 for aln in hit.aln:
                                     pfamft = {'Start':aln.stat['SbjStart'],'End':aln.stat['SbjEnd'],'Type':'PFAM',
                                                'Desc':'%s PFam HMM Eval: %.2e; Score: %.1f' % (search.info['Name'],aln.stat['Expect'],aln.stat['BitScore'])}
                                     if disorder:
                                         region = disorder[aln.stat['SbjStart']-1:aln.stat['SbjEnd']]
                                         hmmdisorder = float(sum(region)) / len(region)
                                         pfamft['Desc'] = '%s; IUPRed: %.2f' % (pfamft['Desc'],hmmdisorder)
                                         if hmmdisorder < self.stat['DisDom']: pfamft['Type'] = 'DOMAIN'
                                     ft.append(pfamft)
                     except: self.log.errorLog('UniFake PFam HMM problem for %s.' % name)                  
                 ## ~ [2f] TMHMM transmembrane topology prediction ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
                 if 'tmhmm' in unifake:
                     try:
                         tmdat = os.popen('%s %s.fas -short' % (self.info['TMHMM'],utmp)).readlines()
                         domlist = rje_tm.domainList(rje_tm.parseTMHMM(tmdat[0]))
                         for tmdom in domlist:
                             ft.append(tmdom)
                             ft[-1]['Desc'] = 'TMHMM topology prediction'
                             ft[-1]['Start'] = string.atoi(ft[-1]['Start'])
                             ft[-1]['End'] = string.atoi(ft[-1]['End'])
                         if len(domlist) > 1: udata['CC'].append('TMHMM: %d TM domains; N-Term %s' % ((len(domlist)-1)/2,domlist[0]['Type']))
                         else: udata['CC'].append('TMHMM: 0 TM domains')
                     except: self.log.errorLog('UniFake TMHMM problem for %s.' % name)
                 ## ~ [2g] SIGNALP signal peptide prediction ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
                 if 'signalp' in unifake:
                     try:
                         os.system('%s -f short -t euk %s.fas > %s.signalp' % (self.info['SignalP'],utmp,utmp))
                         tm.signalp = {}
                         tm.parseSignalP('%s.signalp' % utmp)
                         sigp = tm.signalp.pop(seq.shortName())
                         cpos = 0
                         if sigp['nn_ymax?'] == 'Y':
                             cpos = string.atoi(sigp['nn_ymaxpos'])
                             desc = 'SignalP NN prediction'
                         if sigp['hmm_cmax?'] == 'Y':
                             hmm_c = string.atoi(sigp['hmm_cmaxpos'])
                             if cpos == 0:
                                 cpos = hmm_c
                                 desc = 'SignalP HMM prediction'
                             else:
                                 if hmm_c < cpos:
                                     cpos = hmm_c
                                     desc = 'SignalP HMM prediction (NN also Y)'
                                 else: desc += ' (HMM also Y)'
                         if cpos > 0: ft.append({'Type':'SIGNALP','Desc':desc,'Start':1,'End':cpos})
                     except: self.log.errorLog('UniFake SignalP problem for %s.' % name)
                 ## ~ [2h] Convert to UniProt and save ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
                 self.addRealUniProt(seq,udata,ft)
                 self.deBug(ft)
                 if not store: uniprot.list['Entry'] = []
                 if uniprot.addFromSeq(seq,data=udata,ft=ft):    ### Converts into UniProtEntry object 
                     if not store: uniprot.saveUniProt(datfile,append=True)
                     #x#open(self.info['DatPickup'],'a').write('%s\n' % seq.shortName())
             ## ~ [2f] Cleanup ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
             except: self.log.errorLog('Problem during UniFake(%s)' % name)
             for tmp in glob.glob('%s*' % utmp): os.unlink(tmp)
             self.printLog('#UNIFAKE','|---------- %s run <<<|>>> %s to go -----------|' % (rje.integerString(sx),rje.integerString(seqnum-sx)),log=False)
         if store: uniprot.saveUniProt(datfile,append=False)
         if self.opt['CleanUp']:
             for tmp in glob.glob('TMHMM*'):
                 if os.path.isdir(tmp): os.rmdir(tmp)            
     except: self.errorLog('Oh, the shame of it! Trouble during UniFake.uniFake()')