Esempio n. 1
0
    def interactiveUpdate(self):    ### Interactive method for updating failed genes
        '''Interactive method for updating failed genes.'''
        try:
            ### ~ Setup failed lists and check interactivity ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
            if self.stat['Interactive'] < 0: return
            failures = []
            for gene in self.list['Genes']:
                if self.dict['GeneCard'][gene]['Symbol'] == '!FAILED!': failures.append(gene)
            if not failures or not rje.yesNo('Try manual mapping of %d failures?' % len(failures)): return

            ### ~ Manually map failures onto new gene list and try extracting ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
            mymapping = {}
            newgenes = []
            try:
                for gene in failures:
                    new = rje.choice('New gene symbol for > %s <?' % gene)
                    if not new: continue
                    mymapping[gene] = new
                    if new not in newgenes: newgenes.append(new)
            except KeyboardInterrupt:
                if rje.yesNo('Quit GeneCards?',default='N'): raise
            except: raise
            self.processGenes(newgenes)
            for gene in mymapping: self.dict['GeneCard'][gene] = self.dict['GeneCard'][mymapping[gene]]
            return self.interactiveUpdate()                                             
        except: self.log.errorLog('Problem during rje_GeneCards.interactiveUpdate()')
Esempio n. 2
0
def cmdHelp(info=None,out=None,cmd_list=[]):   ### Prints *.__doc__ and asks for more sys.argv commands
    '''Prints *.__doc__ and asks for more sys.argv commands.'''
    try:
        if info == None:
            info = makeInfo()
        if out == None:
            out = rje.Out()
        helpx = cmd_list.count('help') + cmd_list.count('-help') + cmd_list.count('-h')
        if helpx > 0:
            print '\n\nHelp for %s %s: %s\n' % (info.program, info.version, time.asctime(time.localtime(info.start_time)))
            out.verbose(-1,4,text=__doc__)
            if rje.yesNo('Show general commandline options?'):
                out.verbose(-1,4,text=rje.__doc__)
            if rje.yesNo('Quit?'):
                sys.exit()
            cmd_list += rje.inputCmds(out,cmd_list)
        elif out.stat['Interactive'] > 1:    # Ask for more commands
            cmd_list += rje.inputCmds(out,cmd_list)
        return cmd_list
    except SystemExit:
        sys.exit()
    except KeyboardInterrupt:
        sys.exit()
    except:
        print 'Major Problem with cmdHelp()'
Esempio n. 3
0
def cmdHelp(info=None, out=None, cmd_list=[]):  ### Prints *.__doc__ and asks for more sys.argv commands
    """Prints *.__doc__ and asks for more sys.argv commands."""
    try:  ### ~ [1] ~ Setup ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
        if not info:
            info = makeInfo()
        if not out:
            out = rje.Out()
        ### ~ [2] ~ Look for help commands and print options if found ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
        help = cmd_list.count("help") + cmd_list.count("-help") + cmd_list.count("-h")
        if help > 0:
            print "\n\nHelp for %s %s: %s\n" % (
                info.program,
                info.version,
                time.asctime(time.localtime(info.start_time)),
            )
            out.verbose(-1, 4, text=__doc__)
            if rje.yesNo("Show general commandline options?"):
                out.verbose(-1, 4, text=rje.__doc__)
            if rje.yesNo("Show disorder commandline options?"):
                out.verbose(-1, 4, text=rje_disorder.__doc__)
            if rje.yesNo("Quit?"):
                sys.exit()  # Option to quit after help
            cmd_list += rje.inputCmds(out, cmd_list)  # Add extra commands interactively.
        elif out.stat["Interactive"] > 1:
            cmd_list += rje.inputCmds(out, cmd_list)  # Ask for more commands
        ### ~ [3] ~ Return commands ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
        return cmd_list
    except SystemExit:
        sys.exit()
    except KeyboardInterrupt:
        sys.exit()
    except:
        print "Major Problem with cmdHelp()"
Esempio n. 4
0
def cmdHelp(
        info=None,
        out=None,
        cmd_list=[]):  ### Prints *.__doc__ and asks for more sys.argv commands
    '''Prints *.__doc__ and asks for more sys.argv commands.'''
    try:  ### ~ [1] ~ Setup ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
        if not info: info = makeInfo()
        if not out: out = rje.Out()
        ### ~ [2] ~ Look for help commands and print options if found ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
        help = cmd_list.count('help') + cmd_list.count(
            '-help') + cmd_list.count('-h')
        if help > 0:
            rje.printf('\n\nHelp for {0} {1}: {2}\n'.format(
                info.program, info.version,
                time.asctime(time.localtime(info.start_time))))
            out.verbose(-1, 4, text=__doc__)
            if rje.yesNo('Show general commandline options?'):
                out.verbose(-1, 4, text=rje.__doc__)
            if rje.yesNo('Quit?'): sys.exit()  # Option to quit after help
            cmd_list += rje.inputCmds(
                out, cmd_list)  # Add extra commands interactively.
        elif out.stat['Interactive'] > 1:
            cmd_list += rje.inputCmds(out, cmd_list)  # Ask for more commands
        ### ~ [3] ~ Return commands ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
        return cmd_list
    except SystemExit:
        sys.exit()
    except KeyboardInterrupt:
        sys.exit()
    except:
        rje.printf('Major Problem with cmdHelp()')
Esempio n. 5
0
def cmdHelp(
        info=None,
        out=None,
        cmd_list=[]):  ### Prints *.__doc__ and asks for more sys.argv commands
    '''Prints *.__doc__ and asks for more sys.argv commands.'''
    try:
        if not info: info = makeInfo()
        if not out: out = rje.Out()
        help = cmd_list.count('help') + cmd_list.count(
            '-help') + cmd_list.count('-h')
        if help > 0:
            print '\n\nHelp for %s %s: %s\n' % (
                info.program, info.version,
                time.asctime(time.localtime(info.start_time)))
            out.verbose(-1, 4, text=__doc__)
            if rje.yesNo('Show general commandline options?'):
                out.verbose(-1, 4, text=rje.__doc__)
            if rje.yesNo('Quit?'): sys.exit()
            cmd_list += rje.inputCmds(out, cmd_list)
        elif out.stat['Interactive'] > 1:
            cmd_list += rje.inputCmds(out, cmd_list)  # Ask for more commands
        return cmd_list
    except SystemExit:
        sys.exit()
    except KeyboardInterrupt:
        sys.exit()
    except:
        print 'Major Problem with cmdHelp()'
Esempio n. 6
0
def cmdHelp(info=None, out=None, cmd_list=[]):  ### Prints *.__doc__ and asks for more sys.argv commands
    """Prints *.__doc__ and asks for more sys.argv commands."""
    try:
        if info == None:
            info = makeInfo()
        if out == None:
            out = rje.Out()
        help = cmd_list.count("help") + cmd_list.count("-help") + cmd_list.count("-h")
        if help > 0:
            print "\n\nHelp for %s %s: %s\n" % (
                info.program,
                info.version,
                time.asctime(time.localtime(info.start_time)),
            )
            out.verbose(-1, 4, text=__doc__)
            if rje.yesNo("Show general commandline options?"):
                out.verbose(-1, 4, text=rje.__doc__)
            if rje.yesNo("Quit?"):
                sys.exit()
            cmd_list += rje.inputCmds(out, cmd_list)
        elif out.stat["Interactive"] > 1:  # Ask for more commands
            cmd_list += rje.inputCmds(out, cmd_list)
        return cmd_list
    except SystemExit:
        sys.exit()
    except KeyboardInterrupt:
        sys.exit()
    except:
        print "Major Problem with cmdHelp()"
Esempio n. 7
0
 def forking(
     self
 ):  ### Keeps forking out and processing jobs until no more jobs in self.list['Forked'].
     '''Keeps forking out and processing jobs until no more jobs in self.list['Forked'].'''
     ### ~ [1] ~ Start first set of jobs ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
     if self.getBool('PIDCheck') or self.dev():
         pidcheck = '%s.pid' % rje.baseFile(
             self.log.info['LogFile'])  # Set *.pid object to match log
     else:
         pidcheck = False
     #self.deBug(pidcheck)
     ### ~ [2] ~ Monitor jobs and set next one running as they finish ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
     while self.list['Forked']:
         if pidcheck: PIDCHECK = open(pidcheck, 'w')
         for fdict in self.list['Forked'][0:]:
             try:
                 pid = fdict['PID']
                 if pidcheck:
                     PIDCHECK.write('%s: %s\n' %
                                    (self.list['Forked'].index(fdict), pid))
                 if string.split('%s' % pid)[0] == 'WAIT': status = 1
                 else: (status, exit_stat) = os.waitpid(pid, os.WNOHANG)
             except:
                 self.errorLog('!')
                 status = 1
             if status > 0:
                 self.list['Forked'].remove(fdict)
                 self.endFork(
                     fdict
                 )  # Fork has finished: can replace with processing
         if pidcheck:
             PIDCHECK.close()
             #self.deBug(open(pidcheck,'r').read())
         ## ~ [2a] Look for eternal hanging of threads ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
         if time.time() - self.getNum('KillTime') > self.getNum(
                 'KillForks'):
             self.verbose(
                 0, 1,
                 '\n%d seconds of main thread inactivity. %d forks still active!'
                 % (self.getNum('KillForks'), len(self.list['Forked'])), 1)
             for fdict in self.list['Forked']:
                 self.verbose(
                     0, 2, ' => Fork %s, PID %d still Active!' %
                     (fdict['ID'], fdict['PID']), 1)
             if self.i() < 0 or rje.yesNo('Kill Main Thread?'):
                 raise ValueError(
                     '%d seconds of main thread inactivity. %d forks still active!'
                     % (self.getNum('KillForks'), len(self.list['Forked'])))
             elif rje.yesNo('Kill hanging forks?'):
                 for fdict in self.list['Forked']:
                     self.printLog(
                         '#KILL', 'Killing Fork %s, PID %d.' %
                         (fdict['ID'], fdict['PID']))
                     os.system('kill %d' % fdict['PID'])
             else:
                 self.setNum({'KillTime': time.time()})
         ## ~ [2b] Sleep ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
         time.sleep(self.getNum('ForkSleep'))
Esempio n. 8
0
 def gasp(self):      ### Main GASP Method, copied from GASP v1.4
     '''
     Main GASP Method, copied from GASP v1.4.
     '''
     try:### ~ [1] Setup ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         mytree = self.obj['Tree']
         indeltree = self.getBool('IndelTree')
         if self.baseFile() == 'infile': self.baseFile('gasp')
         ### ~ [2] GASP ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         if not indeltree or not mytree.node[-1].obj['Sequence']:  # Perform GASP
             self.printLog('#SEQ','GASP: Gapped Ancestral Sequence Prediction')
             mygasp = rje_ancseq.Gasp(tree=mytree,ancfile='%s' % self.baseFile(),cmd_list=self.cmd_list,log=self.log)
             self.verbose(0,2,'%s' % mygasp.details(),1)
             if self.i() > 0:
                 if not rje.yesNo('Use these parameters?'):
                     mygasp.edit()
             mygasp.gasp()
             self.printLog('#GASP',"GASP run completed OK!")
             self.dict['Output']['anc.fas'] = '%s.anc.fas' % self.baseFile()
             self.dict['Output']['anc.nsf'] = '%s.anc.nsf' % self.baseFile()
             self.dict['Output']['anc.txt'] = '%s.anc.txt' % self.baseFile()
         ### ~ [3] InDel Tree ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         if indeltree:
             self.dict['Output']['indeltree'] = '%s.indel.txt' % self.baseFile()
             mytree.indelTree(filename='%s.indel.txt' % self.baseFile())
     except: self.errorLog('%s.gasp error' % self.prog())
Esempio n. 9
0
    def convert(self,filelist=[],outfile=None):      ### Converts scansite output files in FileList to Outfile
        '''
        Converts scansite output files in FileList to Outfile.
        '''
        try:
            ### Setup ###
            _stage = 'Setup'
            if len(filelist) < 1:
                filelist = self.list['FileList']
            if not outfile:
                outfile = self.info['Name']          
            if len(filelist) < 1:
                self.log.errorLog('No scansite files to convert! %s unchanged/not made.' % outfile,printerror=False)
                return False
            delimit = rje.getDelimit(self.cmd_list)
            ext = rje.delimitExt(delimit)
            if ext != outfile[-3:]:
                newfile = outfile[:-3] + ext
                if rje.yesNo('Change file name from %s to %s?' % (outfile, newfile)):
                    outfile = newfile
            self.log.printLog('#OUT','Converting %d file(s), output to %s.' % (len(filelist),outfile))

            ### Output File ###
            _stage = 'Output File'
            if not self.opt['Append'] or not os.path.exists(outfile):   # Create with header
                OUTFILE = open(outfile,'w')
                headers = ['seq_id','enzyme','enz_group','aa','pos','score','percentile','matchseq','sa']
                rje.writeDelimit(OUTFILE,headers,delimit)
            else:
                OUTFILE = open(outfile,'a')

            ### Conversion ###
            _stage = 'Conversion'
            sx = 0
            for infile in filelist:
                if not os.path.exists(infile):
                    self.log.errorLog('Input file %s does not exist! :o(' % infile,False,False)
                    continue
                fx = 0
                INFILE = open(infile,'r')
                inline = rje.nextLine(INFILE)
                while inline != None:
                    if rje.matchExp(re_scansite,inline):
                        scanlist = rje.matchExp(re_scansite,inline)
                    rje.writeDelimit(OUTFILE,scanlist,delimit)
                    sx += 1
                    fx += 1
                    rje.progressPrint(self,sx)
                    inline = rje.nextLine(INFILE)
                self.log.printLog('#OUT','%s scansite results from %s. (%s Total.)' % (rje.integerString(fx),infile,rje.integerString(sx)))
                INFILE.close()

            ### End ###
            _stage = 'End'
            OUTFILE.close()
            self.log.printLog('#OUT','%s scansite results output to %s.' % (rje.integerString(sx),outfile))
            return True            
        except:
            self.log.errorLog('Error in convert(%s)' % _stage,printerror=True,quitchoice=False)
            raise   
Esempio n. 10
0
 def save(self):  ### Saves parsed REST output to files
     '''Saves parsed REST output to files.'''
     rbase = '%s%s' % (self.getStr('RestOutDir'),
                       rje.baseFile(self.getStr('RestBase'),
                                    strip_path=True,
                                    keepext=True))
     rje.mkDir(self, self.getStr('RestOutDir'))
     outputs = rje.sortKeys(self.dict['Output'])
     if self.getStrLC('Rest') in outputs: outputs = [self.getStrLC('Rest')]
     elif self.getStrLC('Rest') in ['full', 'text']:
         outfile = '%s.rest' % rbase
         open(outfile, 'w').write(self.restFullOutput())
         self.printLog('#OUT', '%s: %s' % (self.getStrLC('Rest'), outfile))
         return True
     elif self.getStrLC('Rest'):
         self.printLog(
             '#OUTFMT', 'REST output format "%s" not recognised.' %
             self.getStrLC('Rest'))
         if self.i() < 0 or not rje.yesNo('Output all parsed outputs?'):
             return False
         outfile = '%s.rest' % rbase
         open(outfile, 'w').write(self.restFullOutput())
         self.printLog('#OUT', 'full: %s' % (outfile))
         return True
     for rkey in outputs:
         if rkey in self.dict['Outfile']:
             rje.backup(self, self.dict['Outfile'][rkey])
             open(self.dict['Outfile'][rkey],
                  'w').write(self.dict['Output'][rkey])
             self.printLog('#OUT',
                           '%s: %s' % (rkey, self.dict['Outfile'][rkey]))
         elif rkey not in ['intro']:
             self.warnLog('No outfile parsed/generated for %s output' %
                          rkey)
Esempio n. 11
0
def cmdHelp(info=None,out=None,cmd_list=[]):   ### Prints *.__doc__ and asks for more sys.argv commands
    '''Prints *.__doc__ and asks for more sys.argv commands.'''
    try:### ~ [1] ~ Setup ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
        if not info: info = makeInfo()
        if not out: out = rje.Out()
        ### ~ [2] ~ Look for help commands and print options if found ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
        cmd_help = cmd_list.count('help') + cmd_list.count('-help') + cmd_list.count('-h')
        if cmd_help > 0:
            print '\n\nHelp for %s %s: %s\n' % (info.program, info.version, time.asctime(time.localtime(info.start_time)))
            out.verbose(-1,4,text=__doc__)
            if rje.yesNo('Show general commandline options?'): out.verbose(-1,4,text=rje.__doc__)
            if rje.yesNo('Quit?'): sys.exit()           # Option to quit after help
            cmd_list += rje.inputCmds(out,cmd_list)     # Add extra commands interactively.
        elif out.stat['Interactive'] > 1: cmd_list += rje.inputCmds(out,cmd_list)    # Ask for more commands
        ### ~ [3] ~ Return commands ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
        return cmd_list
    except SystemExit: sys.exit()
    except KeyboardInterrupt: sys.exit()
    except: print 'Major Problem with cmdHelp()'
Esempio n. 12
0
    def setup(self):    ### Main class setup method.
        '''Main class setup method.'''
        try:### ~ [1] ~ Setup Program ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
            self.obj['Prog'] = None
            prog = self.getStrLC('Name')
            if prog in mod:
                i = self.obj['ProgInfo'] = mod[prog].makeInfo()
                self.printLog('#PROG','%s V%s: %s' % (i.program,i.version,i.description))
                progcmd = rje.getCmdList([],info=i) + self.cmd_list + ['newlog=F']
                out = rje.Out(cmd_list=progcmd)
                out.printIntro(i)
                #self.debug(prog); self.debug(progcmd)
                if self.getBool('Help'): progcmd = mod[prog].cmdHelp(i,out,['help']+progcmd)
                self.printLog('#CMD','Full %s CmdList: %s' % (i.program,rje.argString(rje.tidyArgs(progcmd,nopath=self.getStrLC('Rest') and not self.dev(),purgelist=purgelist))),screen=False)
                #self.debug(prog); self.debug(progcmd)
            ## ~ [1a] ~ Make self.obj['Prog'] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
                if prog in ['seqlist','rje_seqlist']: self.obj['Prog'] = rje_seqlist.SeqList(self.log,progcmd)
                elif prog in ['uniprot','rje_uniprot']: self.obj['Prog'] = rje_uniprot.UniProt(self.log,progcmd)
                elif prog in ['taxonomy','rje_taxonomy']: self.obj['Prog'] = rje_taxonomy.Taxonomy(self.log,progcmd)
                elif prog in ['tree','rje_tree']: self.obj['Prog'] = rje_tree.Tree(self.log,progcmd)
                elif prog in ['xref','rje_xref']: self.obj['Prog'] = rje_xref.XRef(self.log,progcmd)
                elif prog in ['seq','rje_seq']: self.obj['Prog'] = rje_seq.SeqList(self.log,progcmd)
                elif prog in ['mitab','rje_mitab']: self.obj['Prog'] = rje_mitab.MITAB(self.log,progcmd)
                elif prog in ['dbase','database']: self.obj['Prog'] = rje_dbase.DatabaseController(self.log,progcmd)
                elif prog in ['pydocs']: self.obj['Prog'] = rje_pydocs.PyDoc(self.log,progcmd)
                elif prog in ['ensembl','rje_ensembl']: self.obj['Prog'] = rje_ensembl.EnsEMBL(self.log,progcmd)
                elif prog in ['genbank','rje_genbank']: self.obj['Prog'] = rje_genbank.GenBank(self.log,progcmd)
                elif prog in ['extatic']: self.obj['Prog'] = extatic.ExTATIC(self.log,progcmd)
                elif prog in ['revert']: self.obj['Prog'] = revert.REVERT(self.log,progcmd)
                elif prog in ['fiesta']: self.obj['Prog'] = fiesta.FIESTA(self.log,progcmd)
                elif prog in ['gablam']: self.obj['Prog'] = gablam.GABLAM(self.log,progcmd)
                elif prog in ['gopher']: self.obj['Prog'] = gopher.Gopher(self.log,progcmd)
                elif prog in ['haqesac']: self.obj['Prog'] = haqesac.HAQESAC(self.log,progcmd)
                elif prog in ['multihaq']: self.obj['Prog'] = multihaq.MultiHAQ(self.log,progcmd)
                elif prog in ['pingu']: self.obj['Prog'] = pingu.PINGU(self.log,progcmd)
                elif prog in ['pacbio']: self.obj['Prog'] = rje_pacbio.PacBio(self.log,progcmd)
                elif prog in ['rje_zen','zen']: self.obj['Prog'] = rje_zen.Zen(self.log,progcmd)

            ### ~ [2] ~ Failure to recognise program ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
            if not self.obj['Prog']:
                self.printLog('#ERR','Program "%s" not recognised.' % self.getStr('Name'))
                if self.i() < 0: return False
                if rje.yesNo('Show SeqSuite help with program options?'):
                    extracmd = cmdHelp(cmd_list=['help'])[1:]
                    if extracmd:
                        self.cmd_list += extracmd
                        self._cmdList()
                        if prog != self.getStrLC('Name'): return self.setup()
                self.setStr({'Name':rje.choice('Give program name (Blank or CTRL+C to quit)')})
                if self.getStrLC('Name'): return self.setup()
                else: return False
            return self.obj['Prog']     # Setup successful
        except KeyboardInterrupt: return False
        except SystemExit: raise
        except: self.errorLog('Problem during %s setup.' % self.prog()); return False  # Setup failed
Esempio n. 13
0
def findOccPos(callobj,Occ,qry,fudge=0):     ### Finds Motif Occurence in alignment
    '''
    Finds Motif Occurence in alignment.
    >> callobj = calling MotifList object
    >> Occ = MotifOcc object
    >> qry = query Sequence object from alignment file
    >> fudge = amount to try shifting match to find occurrence is non-matching sequence
    << (start,end) = start and end position in aligment to allow sequence[start:end]
    '''
    try:
        ### Find Hit in Alignment ###
        (start,end) = (-1,qry.seqLen())             # Start and end positins of match *in alignment*
        qpos = Occ.stat['Pos'] + fudge              # Starting position of hit (from 1->L)
        qmatch = Occ.getData('Match')
        qend = qpos + len(qmatch) - 1    # Ending position of hit (1->L)
        (r,a) = (0,0)                               # Counters for aln residues (r) and amino acid positions (a)
        while r < qry.seqLen():     # Keep looking
            #x#print r, a, qpos, qend, start, end
            if qry.info['Sequence'][r] != '-':      # Not a gap: increment a by 1
                a += 1
            if a == qpos and start < 0:             # Start of match (not yet r+=1 because pos is 1->L)
                start = r
            r += 1                                  # Move on through aligned sequences
            if a == qend:                           # End of match
                end = r
                break

        ### Assess whether hit is right! ###
        amatch = string.replace(qry.info['Sequence'][start:end],'-','')
        if amatch == qmatch:     # Everything is OK!
            return (start,end)
        ## Check whether already fudging! ##
        if fudge != 0:
            raise ValueError

        ### Something is wrong! Try to find real match! ###
        etxt = 'Alignment sequence (%s) does not match occurence (%s)' % (amatch,qmatch)
        #X#if string.replace(qry.info['Sequence'],'-','') == Occ.obj['Seq'].info['Sequence']:  # But sequence matches!
        #X#callobj.log.errorLog('Problem with %s pos %d. Sequences match but %s.' % (qry.shortName(),Occ.stat['Pos'],etxt),printerror=False)
        #X#return (-1,-1)
        ## Try to find match by moving start (using fudge) ##
        if callobj.stat['Interactive'] < 1 or rje.yesNo('%s. Try to find closest correct match?' % etxt):
            fudge = findFudge(string.replace(qry.info['Sequence'],'-',''),qmatch,Occ.stat['Pos']-1)
            if fudge:
                if callobj.stat['Interactive'] > 0: callobj.log.errorLog('%s in alignment differs from input: Fudged %s by %d aa!' % (qry.shortName(),qmatch,fudge),printerror=False)
                else: callobj.log.printLog('#ERR','%s in alignment differs from input: Fudged %s by %d aa!' % (qry.shortName(),qmatch,fudge),screen=False)
                return findOccPos(callobj,Occ,qry,fudge)
            callobj.log.errorLog('%s in alignment differs from input: Cannot find %s anywhere' % (qry.shortName(),qmatch),printerror=False)
            return (-1,-1)
        callobj.log.errorLog(etxt,printerror=False)
        return (-1,-1)
    except:
        callobj.log.errorLog('Something bad has happened in rje_motif_stats.findOccPos()')
        return (-1,-1)
Esempio n. 14
0
    def setup(self,rest=False):    ### Main class setup method.
        '''Main class setup method.'''
        try:### ~ [1] ~ Setup Program ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
            self.obj['Prog'] = None
            prog = self.getStrLC('Name')
            if prog in mod:
                i = self.obj['ProgInfo'] = mod[prog].makeInfo()
                self.printLog('#PROG','%s V%s: %s' % (i.program,i.version,i.description))
                progcmd = rje.getCmdList([],info=i) + self.cmd_list + ['newlog=F']
                out = rje.Out(cmd_list=progcmd)
                out.printIntro(i)
                if self.getBool('Help'): progcmd = mod[prog].cmdHelp(i,out,['help']+progcmd)
                purgelist = seqsuite.purgelist
                self.printLog('#CMD','Full %s CmdList: %s' % (i.program,rje.argString(rje.tidyArgs(progcmd,nopath=self.getStrLC('Rest') and not self.dev(),purgelist=purgelist))),screen=False)
                #self.debug(prog)
            ## ~ [1a] ~ Make self.obj['Prog'] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
                if prog in ['slimcore','rje_slimcore','core']: self.obj['Prog'] = rje_slimcore.SLiMCore(self.log,progcmd)
                elif prog in ['rlc','iuscore']: self.obj['Prog'] = rje_slimcore.SLiMCore(self.log,progcmd+['prog=%s' % prog])
                elif prog in ['slimlist','rje_slimlist']: self.obj['Prog'] = rje_slimlist.SLiMList(self.log,progcmd)
                elif prog in ['slimfinder']: self.obj['Prog'] = slimfinder.SLiMFinder(self.log,progcmd)
                elif prog in ['qslimfinder']: self.obj['Prog'] = qslimfinder.QSLiMFinder(self.log,progcmd)
                elif prog in ['slimprob']: self.obj['Prog'] = slimprob.SLiMProb(self.log,progcmd)
                elif prog in ['slimmaker']: self.obj['Prog'] = slimmaker.SLiMMaker(self.log,progcmd)
                elif prog in ['slimfarmer','farm']: self.obj['Prog'] = slimfarmer.SLiMFarmer(self.log,progcmd)
                elif prog in ['slimbench']: self.obj['Prog'] = slimbench.SLiMBench(self.log,progcmd)
                elif prog in ['comparimotif']: self.obj['Prog'] = comparimotif.CompariMotif(self.log,progcmd)
                elif prog in ['peptcluster']: self.obj['Prog'] = peptcluster.PeptCluster(self.log,progcmd)
                elif prog in ['peptalign']: self.obj['Prog'] = peptcluster.PeptCluster(self.log,['peptalign=T']+progcmd+['peptdis=None'])
                self.obj['Prog'].dict['Output']['help'] = mod[prog].__doc__
            elif prog in seqsuite.mod:
                seqsuiteobj = seqsuite.SeqSuite(self.log,self.cmd_list)
                self.obj['Prog'] = seqsuiteobj.setup()
                self.obj['ProgInfo'] = seqsuiteobj.obj['ProgInfo']
                self.obj['Prog'].dict['Output']['help'] = seqsuite.mod[prog].__doc__

            ### ~ [2] ~ Failure to recognise program ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
            if not self.obj['Prog']:
                if self.getStrLC('Name') != 'help':
                    if not rest: self.printLog('#ERR','Program "%s" not recognised.' % self.getStr('Name'))
                    if self.i() < 0 or rest: return False
                #!# Try SeqSuite? #!#
                if self.getStrLC('Name') == 'help' or rje.yesNo('Show SLiMSuite help with program options?'):
                    extracmd = cmdHelp(cmd_list=['help'])[1:]
                    if extracmd:
                        self.cmd_list += extracmd
                        self._cmdList()
                        if prog != self.getStrLC('Name'): return self.setup()
                self.setStr({'Name':rje.choice('Give program name (Blank or CTRL+C to quit)')})
                if self.getStrLC('Name'): return self.setup()
                else: return False
            return self.obj['Prog']     # Setup successful
        except KeyboardInterrupt: return False
        except SystemExit: raise
        except: self.errorLog('Problem during %s setup.' % self.prog()); return False  # Setup failed
Esempio n. 15
0
 def setup(self):    ### Main class setup method. Makes sumfile if necessary.
     '''Main class setup method. Makes sumfile if necessary.'''
     try:### ~ [1] Setup ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         self.debug(self.getStrLC('SumFile')); self.debug(self.getStr('SumFile'))
         if self.getStrLC('Basefile') in ['','none']: self.baseFile(rje.baseFile(self.info['SumFile']))
         if self.getStrLC('SumFile') in ['','none']: self.info['SumFile'] = '%s.tdt' % self.basefile()
         self.printLog('#SUM','Summary file: %s' % self.getStr('SumFile'))
         if os.path.exists(self.info['SumFile']) and not self.opt['Force']:
             if rje.yesNo('%s found. Use these results?' % self.info['SumFile']):
                 return self.printLog('#SUM','Summary results file found. No MASCOT processing.')
         mapgi = False
         ### ~ [2] Process MASCOT ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         for mfile in self.list['ResFiles']:
             bud = budapest.Budapest(self.log,self.cmd_list+['mascot=%s' % mfile])
             bud.info['Name'] = mfile
             bud.readMascot()
             self.dict['Searches'][mfile] = bud.dict['Hits']
             protacclist = rje.sortKeys(bud.dict['Hits'])
             for protacc in protacclist:
                 if rje.matchExp('gi\|(\d+)',protacc): mapgi = True
             accfile = '%s.%s.protacc' % (self.baseFile(),rje.baseFile(mfile))
             self.debug(accfile)
             open(accfile,'w').write(string.join(protacclist,'\n'))
             self.printLog('#MFILE','%s: %s proteins.' % (mfile,rje.iLen(protacclist)))
         ## ~ [2a] gi Mapping ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
         #if mapgi:
         #    mapgi = self.dict['MapGI'] = seqlist.seqNameDic('NCBI')
         #    open('mapgi.tmp','w').write(string.join(rje.sortKeys(mapgi),'\n'))
         ### ~ [3] Setup seqlist ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         seqlist = rje_seq.SeqList(self.log,['gnspacc=T']+self.cmd_list)
         self.dict['Acc2Seq'] = seqlist.seqNameDic('Max')
         ### ~ [4] Generate Summary File ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         sumhead = string.split('search,prot_hit_num,prot_acc,prot_desc,pep_seq',',')
         rje.delimitedFileOutput(self,self.info['SumFile'],sumhead,rje_backup=True)
         for mfile in rje.sortKeys(self.dict['Searches']):
             bud = self.dict['Searches'][mfile]
             for protacc in rje.sortKeys(bud)[0:]:
                 protname = bud[protacc]['prot_acc']
                 protdesc = bud[protacc]['prot_desc']
                 if rje.matchExp('gi\|(\d+)',protacc):
                     gi = rje.matchExp('gi\|(\d+)',protacc)[0]
                     try:
                         protname = self.dict['Acc2Seq'][gi].shortName()
                         protdesc = self.dict['Acc2Seq'][gi].info['Description']
                     except: protname = 'gi_UNK__%s' % gi
                 #x#print protname, protdesc, bud[protacc]
                 for pep in bud[protacc]['Peptides']:
                     data = {'search':rje.baseFile(mfile,True),'prot_desc':protdesc,'prot_acc':protname,
                             'pep_seq':pep,'prot_hit_num':bud[protacc]['prot_hit_num']}
                     rje.delimitedFileOutput(self,self.info['SumFile'],sumhead,datadict=data)
     except: self.errorLog('Problem during %s setup.' % self); return False  # Setup failed
Esempio n. 16
0
 def forking(self):  ### Keeps forking out and processing jobs until no more jobs in self.list['Forked'].
     '''Keeps forking out and processing jobs until no more jobs in self.list['Forked'].'''
     ### ~ [1] ~ Start first set of jobs ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
     if self.getBool('PIDCheck') or self.dev(): pidcheck = '%s.pid' % rje.baseFile(self.log.info['LogFile'])    # Set *.pid object to match log
     else: pidcheck = False
     #self.deBug(pidcheck)
     ### ~ [2] ~ Monitor jobs and set next one running as they finish ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
     while self.list['Forked']:
         if pidcheck: PIDCHECK = open(pidcheck,'w')
         for fdict in self.list['Forked'][0:]:
             try:
                 pid = fdict['PID']
                 if pidcheck: PIDCHECK.write('%s: %s\n' % (self.list['Forked'].index(fdict),pid))
                 if string.split('%s' % pid)[0] == 'WAIT': status = 1
                 else: (status,exit_stat) = os.waitpid(pid,os.WNOHANG)
             except:
                 self.errorLog('!')
                 status = 1
             if status > 0:
                 self.list['Forked'].remove(fdict)
                 self.endFork(fdict)   # Fork has finished: can replace with processing
         if pidcheck:
             PIDCHECK.close()
             #self.deBug(open(pidcheck,'r').read())
         ## ~ [2a] Look for eternal hanging of threads ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
         if time.time() - self.getNum('KillTime') > self.getNum('KillForks'):
             self.verbose(0,1,'\n%d seconds of main thread inactivity. %d forks still active!' % (self.getNum('KillForks'),len(self.list['Forked'])),1)
             for fdict in self.list['Forked']:
                 self.verbose(0,2,' => Fork %s, PID %d still Active!' % (fdict['ID'],fdict['PID']),1)
             if self.i() < 0 or rje.yesNo('Kill Main Thread?'):
                 raise ValueError('%d seconds of main thread inactivity. %d forks still active!' % (self.getNum('KillForks'),len(self.list['Forked'])))
             elif rje.yesNo('Kill hanging forks?'):
                 for fdict in self.list['Forked']:
                     self.printLog('#KILL','Killing Fork %s, PID %d.' % (fdict['ID'],fdict['PID']))
                     os.system('kill %d' % fdict['PID'])
             else: self.setNum({'KillTime':time.time()})
         ## ~ [2b] Sleep ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
         time.sleep(self.getNum('ForkSleep'))
Esempio n. 17
0
def cmdHelp(
        info=None,
        out=None,
        cmd_list=[]):  ### Prints *.__doc__ and asks for more sys.argv commands
    '''Prints *.__doc__ and asks for more sys.argv commands.'''
    try:
        if not info: info = makeInfo()
        if len(sys.argv) == 2 and sys.argv[1] in [
                'version', '-version', '--version'
        ]:
            rje.printf(info.version)
            sys.exit(0)
        if len(sys.argv) == 2 and sys.argv[1] in [
                'details', '-details', '--details'
        ]:
            rje.printf('{0} v{1}'.format(info.program, info.version))
            sys.exit(0)
        if not out: out = rje.Out()
        help = cmd_list.count('help') + cmd_list.count(
            '-help') + cmd_list.count('-h')
        if help > 0:
            rje.printf('\n\nHelp for {0} {1}: {2}\n'.format(
                info.program, info.version,
                time.asctime(time.localtime(info.start_time))))
            out.verbose(-1, 4, text=__doc__)
            if rje.yesNo('Show general commandline options?'):
                out.verbose(-1, 4, text=rje.__doc__)
            if rje.yesNo('Quit?'): sys.exit()
            cmd_list += rje.inputCmds(out, cmd_list)
        elif out.stat['Interactive'] > 1:
            cmd_list += rje.inputCmds(out, cmd_list)  # Ask for more commands
        return cmd_list
    except SystemExit:
        sys.exit()
    except KeyboardInterrupt:
        sys.exit()
    except:
        rje.printf('Major Problem with cmdHelp()')
Esempio n. 18
0
    def maskCleave(self):   ### Outputs masked cleavage sequences to file
        '''
        Outputs masked cleavage sequences to file.
        '''
        try:
            ### <a> ### Setup
            _stage = '<a> Setup'
            seqlist = self.obj['SeqList']
            seqlist.loadSeqs()
            seqlist.degapSeq()
            outfile = '%s.cleaved.fas' % seqlist.info['Basefile']

            ### <b> ### MaskSeqs
            _stage = '<b> Mask'
            self.verbose(0,3,'Masking cleaved signalp petides from %s into %s...' % (seqlist.info['Name'],outfile),0)
            cx = 0
            for seq in seqlist.seq:
                cpos = 0
                acc = seq.info['AccNum']
                if acc in self.signalp.keys():
                    sigp = self.signalp.pop(acc)
                else:
                    continue
                if sigp['nn_ymax?'] == 'Y':
                    cpos = string.atoi(sigp['nn_ymaxpos']) 
                if sigp['hmm_cmax?'] == 'Y':
                    hmm_c = string.atoi(sigp['hmm_cmaxpos'])
                    if cpos==0 or (cpos > 0 and hmm_c < cpos):
                        cpos = hmm_c
                if cpos > 0:
                    cx += 1
                    seq.info['Sequence'] = 'X' * cpos + seq.info['Sequence'][cpos:]
                    if cx/100 == cx/100.0:
                        self.verbose(0,4,'.',0)
            self.verbose(0,1,'Done! %d sequences masked.' % cx,1)

            ### <c> ### Save
            _stage = '<c> Save'
            seqlist.saveFasta(seqfile=outfile)

            ### <d> ### Run TMHMM
            _stage = '<d> Run TMHMM'
            if rje.yesNo('Run TMHMM on this now?'):
                os.system('tmhmm %s.cleaved.fas -short > %s.cleaved.tmhmm' % (seqlist.info['Basefile'],seqlist.info['Basefile']))           
            return
        except:
            self.log.errorLog('Problem with maskCleave() %s.' % _stage)
Esempio n. 19
0
 def setup(self):    ### Main class setup method.
     '''Main class setup method.'''
     try:### ~ [1] Setup ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         self.obj['DB'] = rje_db.Database(self.log,self.cmd_list)
         self.db().basefile(self.basefile())
         self.list['Accuracy'] = [0,1.0 - self.getNum('ErrPerBase')]
         ## ~ [1a] SMRTReads ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
         while self.getStrLC('SMRTUnits') not in ['reads','gb','mb']:
             txt = 'SMRTUnits "%s" not recognised'
             if self.getNum('SMRTReads') < 10: smrtunits = 'Gb'
             elif self.getNum('SMRTReads') > 10000: smrtunits = 'reads'
             else: smrtunits = 'Mb'
             if self.i() < 0 or rje.yesNo('%s: switch to (%s) %s?' % (txt,self.getNum('SMRTReads'),smrtunits)):
                 self.setStr({'SMRTUnits':smrtunits})
             elif self.i() >0: self.setStr({'SMRTUnits':rje.choice('SMRTUnits (reads/Gb/Mb)?')})
             self.printLog('#UNITS','%s => %s' % (txt,self.getStr('SMRTUnits')))
         if self.getStrLC('SMRTUnits') in ['gb','mb']:
             smrttotal = self.getNum('SMRTReads') * {'gb':1e9,'mb':1e6}[self.getStrLC('SMRTUnits')]
             txt =  '%s %s @ %.3f kb/read' % (self.getNum('SMRTReads'),self.getStr('SMRTUnits'),self.getNum('AvRead')/1000.0)
             self.setNum({'SMRTReads':smrttotal/self.getNum('AvRead')})
             txt += ' => %s reads' % rje.iStr(int(self.getNum('SMRTReads')))
             self.printLog('#READS',txt)
         ## ~ [1b] XnList ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
         xnlist = []
         for xn in self.list['XnList']:
             if xn == '': continue
             try:
                 ixn = int(xn)
                 if xn not in [ixn,'%d' % ixn]: self.printLog('#XN','"%s" -> %dX' % (xn,ixn))
                 if ixn == 0: self.printLog('#XN','No point in 0X output: use 1-%Coverage.')
                 elif ixn == 1: self.printLog('#XN','No point in 1X output: use %Coverage.')
                 else: xnlist.append(ixn)
             except: self.errorLog('Could not process %s as part of XnList. (Integers only.)' % xn)
         xnlist.sort()
         if xnlist: self.printLog('#XN','XnList: %sX.' % string.join(string.split('%s' % xnlist,','),'X, ')[1:-1])
         self.list['XnList'] = xnlist
         return True     # Setup successful
     except: self.errorLog('Problem during %s setup.' % self.prog()); return False  # Setup failed
Esempio n. 20
0
 def save(self):     ### Saves parsed REST output to files
     '''Saves parsed REST output to files.'''
     rbase = '%s%s' % (self.getStr('RestOutDir'),rje.baseFile(self.getStr('RestBase'),strip_path=True,keepext=True))
     rje.mkDir(self,self.getStr('RestOutDir'))
     outputs = rje.sortKeys(self.dict['Output'])
     if self.getStrLC('Rest') in outputs: outputs = [self.getStrLC('Rest')]
     elif self.getStrLC('Rest') in ['full','text']:
         outfile = '%s.rest' % rbase
         open(outfile,'w').write(self.restFullOutput())
         self.printLog('#OUT','%s: %s' % (self.getStrLC('Rest'),outfile))
         return True
     elif self.getStrLC('Rest'):
         self.printLog('#OUTFMT','REST output format "%s" not recognised.' % self.getStrLC('Rest'))
         if self.i() < 0 or not rje.yesNo('Output all parsed outputs?'): return False
         outfile = '%s.rest' % rbase
         open(outfile,'w').write(self.restFullOutput())
         self.printLog('#OUT','full: %s' % (outfile))
         return True
     for rkey in outputs:
         if rkey in self.dict['Outfile']:
             rje.backup(self,self.dict['Outfile'][rkey])
             open(self.dict['Outfile'][rkey],'w').write(self.dict['Output'][rkey])
             self.printLog('#OUT','%s: %s' % (rkey,self.dict['Outfile'][rkey]))
         elif rkey not in ['intro']: self.warnLog('No outfile parsed/generated for %s output' % rkey)
Esempio n. 21
0
def runMain():
    try:
        ### <0>  ### Basic Setup of Program
        [info,out,mainlog,cmd_list] = setupProgram()
        
        ### <1> ### Load Data
        ##  <a>  ## Read in Sequences
        try:
            out.verbose(1,3,'Loading sequences...',0)
            seqfile = 'infile.fas'
            nsfin = None
            for cmd in cmd_list:
                if cmd.find('seqin=') == 0:
                    seqfile=cmd[len('seqin='):]
                if cmd.find('nsfin=') == 0:
                    nsfin = cmd[len('nsfin='):]
            basefile = seqfile
            extension = seqfile[-4:]
            if (extension == '.fas') or (extension == '.phy') or (extension == '.aln'):
                basefile = seqfile[:-4]
            seqs = rje_seq.SeqList(log=mainlog,cmd_list=['i=0']+cmd_list+['autofilter=F','autoload=F','seqin=None']) 
            out.verbose(1,3,"from %s" % seqfile,1)
            if not seqs.loadSeqs(seqfile=seqfile,seqtype='protein',aln=True):
                raise
            seqfile = seqs.info['Name']
            basefile = rje.baseFile(seqfile)
            mainlog.printLog('#SEQ',"%s protein sequences read from %s\n" % (str(seqs.seqNum()),seqfile),1)
            mainlog.printLog('#SEQ',"Alignment = %s. (%d aa)\n" % (seqs.opt['Aligned'],seqs.seq[0].seqLen()),1)
        except:
            mainlog.errorLog("Fatal run Exception during Sequence Input\n")
            raise
        ##  <b>  ## Read in Tree
        try:
            if not nsfin:
                nsfin = basefile + '.nsf'
            while not os.path.exists(nsfin):
                if out.stat['Interactive'] >= 0: 
                    nsfin = rje.choice(text='Input tree file "%s" not found. Input filename? (Blank to exit.)' % nsfin)
                    if nsfin == '':
                        raise KeyboardInterrupt
                else:
                    mainlog.log.errorLog('File %s not found. Cannot load tree!' % nsfin,printerror=False,quitchoice=True)
                    raise
            cmd_list.append('nsfin=' + nsfin)
            out.verbose(1,3,'Loading tree from %s...' % nsfin,1)
            mytree = rje_tree.Tree(log=mainlog,cmd_list=['root=yes']+cmd_list)
            mytree.mapSeq(seqlist=seqs)
            mytree.textTree()
            if mytree.opt['ReRooted']:
                mytree.saveTree(filename='%s.nsf' % basefile)
        except KeyboardInterrupt:
            mainlog.errorLog("User terminated.\n")
            raise
        except:
            mainlog.errorLog("Fatal run Exception during Tree Input\n")       
            raise

        ### <2> ### GASP
        try:
            ## <a> ## InDel Tree Setup
            indeltree = None
            for cmd in cmd_list:
                if cmd.find('indeltree=') == 0:
                    indeltree=cmd[len('indeltree='):]

            ## <b> ## GASP
            if indeltree == None or mytree.node[-1].obj['Sequence'] == None:  # Perform GASP
                out.verbose(0,2,'',3)
                mainlog.printLog('#SEQ','GASP: Gapped Ancestral Sequence Prediction',1)
                if basefile == 'infile':
                    basefile = 'gasp'
                mygasp = rje_ancseq.Gasp(tree=mytree,ancfile='%s' % basefile,cmd_list=cmd_list,log=mainlog)
                out.verbose(0,2,'%s' % mygasp.details(),1)
                if out.stat['Interactive'] > 0:
                    if rje.yesNo('Use these parameters?') == False:
                        mygasp.edit()
                mygasp.gasp()
                out.verbose(0,1,"\n\nGASP run completed OK!",2)

            ## <c> ## InDel Tree
            if indeltree:
                mytree.indelTree(filename=indeltree)

        except KeyboardInterrupt:
            mainlog.errorLog("User terminated.\n")
            raise
        except:
            mainlog.errorLog("Fatal run Exception during GASP\n")       
            raise

        ### <X> ### End
    except KeyboardInterrupt:
        mainlog.errorLog("User terminated.\n")
    except:
        print "Unexpected error:", sys.exc_info()[0]
    mainlog.printLog('#LOG', "%s V:%s End: %s\n" % (info.program, info.version, time.asctime(time.localtime(time.time()))), 1)
Esempio n. 22
0
    def forker(self):      ### Generic forking method
        '''
        Generic method for forking (without threads).
        Add description here (and arguments.)
        '''
        try:
            ### <0> ### Setup
            _stage = '<0> Fork Setup'
            forkx = int(self.stat['Forks'])   # Number of forks to have running at one time
            if self.opt['Win32'] or forkx < 1:
                self.opt['NoForks'] = True
            forks = []  # List of active fork PIDs
            killforks = int(self.stat['KillForks']) # Time in seconds to wait after main thread has apparently finished
            forking_condition = True    # Condition to keep forking

            ### Sequence List setup ###
            _stage = '<1> Forking'
            seqx = 0    # Sequence Counter
            subx = 0    # Subset sequence counter
            outfile = None  # Output file name
            randlist = []   # List of random strings for split sequence files
            filedict = {}   # Dictionary of input files for each random string
            seqlist = rje_seq.SeqList(log=self.log,cmd_list=['autoload=F']+self.cmd_list)
            seqlist.makeBaseFile()
            SEQFILE = open(seqlist.info['Name'], 'r')
            (seq,lastline) = seqlist.nextFasSeq(SEQFILE,'Starting')
            while seq:
                seqlist.seq = [seq]
                if self.info['StartFrom'] != 'None':    # Not yet reached wanted sequence
                    if self.info['StartFrom'] in [seq.info['Name'], seq.info['ID'], seq.info['AccNum'], seq.shortName()]:
                        self.info['StartFrom'] = 'None'
                if self.info['StartFrom'] == 'None':    # Wanted sequence
                    if outfile:   # Create new file
                        SEQOUT = open(outfile,'a')
                    else:
                        rs = rje.randomString(6)
                        while rs in randlist:
                            rs = rje.randomString(6)
                        outfile = '%s.%s.fas' % (seqlist.info['Basefile'],rs)
                        SEQOUT = open(outfile,'w')
                        randlist.append(rs)
                        filedict[rs] = outfile
                    SEQOUT.write('>%s\n%s\n' % (seq.info['Name'],seq.info['Sequence']))
                    SEQOUT.close()
                    seqx += 1
                    subx += 1
                    if subx == self.stat['Split']:  # Finished split
                        self.log.printLog('#SEQ','%s sequences output to %s.' % (rje.integerString(subx),outfile))
                        outfile = None
                        subx = 0
                (seq,lastline) = seqlist.nextFasSeq(SEQFILE,lastline)
            if subx > 0:
                self.log.printLog('#SEQ','%s sequences output to %s.' % (rje.integerString(subx),outfile))
            self.log.printLog('#SEQ','%s sequences output in total to %d files.' % (rje.integerString(seqx),len(randlist)))
            # Now have the list of random strings in randlist (in order) and filenames in filedict

            ### <1> ### Forking                            
            killtime = time.time()
            dealt_with = 0      # Split files dealt with
            while dealt_with < len(randlist) or len(forks):

                ## <a> ## forks
                _stage = '<1a> New Forks'
                while dealt_with < len(randlist) and (len(forks) < forkx or self.opt['NoForks']):     # Add more forks
                    _stage = '<1a-i> Fork: Get stuff for fork'
                    killtime = time.time()  # Reset killtime - still doing stuff
                    # Add new fork
                    _stage = '<1a-ii> Fork: New Fork'
                    new_fork_id = randlist[dealt_with]
                    dealt_with += 1
                    
                    outcmd = string.split(self.info['OutCmd'],'.')
                    if len(outcmd) > 1:
                        outcmd = outcmd[:-1] + [new_fork_id] + outcmd[-1:]
                    else:
                        outcmd = outcmd + [new_fork_id] + ['resfile']
                    outcmd = string.join(outcmd,'.')    
                    forkcmd = '%s %s%s %s %s log=%s.log newlog=T i=-1' % (self.info['ForkProg'],self.info['SeqInCmd'],filedict[new_fork_id],outcmd,self.info['ForkCmd'],new_fork_id)
                    if self.opt['NoForks']:
                        os.system(forkcmd)                        
                    else:   # Forks
                        newpid = os.fork() 
                        if newpid == 0: # child 
                            os.system(forkcmd)
                            sys.exit()    # Exit process 
                        elif newpid == -1: # error
                            self.log.errorLog('Problem forking %s.' % new_fork_id)
                        else: 
                            forks.append(newpid)    # Add fork to list 
            
                ## <b> ## Monitor and remove finished forks
                _stage = '<1b> Finished Forks'
                forklist = self._activeForks(forks)
                if len(forklist) != len(forks):
                    self.verbose(0,2,' => %d of %d forks finished!' % (len(forks) - len(forklist),len(forks)),1)
                    forks = forklist[0:]
                        
                self.verbose(3,3,'End of a Cycle.',2)

                ## <c> ## Look for eternal hanging of forks
                _stage = '<1c> Hanging'
                if time.time() - killtime > killforks:
                    self.verbose(0,1,'\n%d seconds of main program inactivity. %d forks still active!' % (killforks,len(forks)),1)
                    for fork in forks:
                        self.verbose(0,2,' => Fork PID %d still Active!' % (fork),1)
                    if rje.yesNo('Kill?'):
                        break   #!# killing options
                    else:
                        killtime = time.time()

            ### <3> ### Finish
            _stage = '<3> Finish'
            if len(forks) > 0:
                self.log.errorLog('%d Forks still active after %d seconds of main program inactivity' % (len(forks),killforks),True)
            else:
                self.verbose(0,1,'Forks have finished.',2)

            ### <4> ### Recompile results
            for randstr in randlist:
                os.unlink(filedict[randstr])
                rje.fileTransfer(fromfile='%s.log' % randstr,tofile=self.log.info['Name'],deletefrom=True)            
                outfiles = glob.glob('*.%s.*' % randstr)
                for outfile in outfiles:
                    compfile = outfile.split('.')
                    compfile.remove(randstr)
                    compfile = string.join(compfile,'.')
                    if randstr == randlist[0] and os.path.exists(compfile) and not self.opt['Append']:
                        os.unlink(compfile)
                    rje.fileTransfer(fromfile=outfile,tofile=compfile,deletefrom=True)
                    self.verbose(1,2,'Copying results data from %s to %s...' % (outfile,compfile),0)
                self.verbose(0,1,'%d results files copied for Split %d.' % (len(outfiles),(randlist.index(randstr)+1)),1)
            self.log.printLog('#OUT','Results for %d splits compiled.' % len(randlist))
            
        except SystemExit:  # Don't want forks raising an Exception upon exiting
            sys.exit()
        except:
            self.log.errorLog('Error in forker(%s):' % _stage,printerror=True,quitchoice=False)     
            raise   # Delete this if method error not terrible
Esempio n. 23
0
    def setup(self):  ### Main class setup method.
        '''Main class setup method.'''
        try:  ### ~ [1] ~ Setup Program ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
            self.obj['Prog'] = None
            prog = self.getStrLC('Name')
            if prog in mod:
                i = self.obj['ProgInfo'] = mod[prog].makeInfo()
                self.printLog(
                    '#PROG',
                    '%s V%s: %s' % (i.program, i.version, i.description))
                progcmd = rje.getCmdList(
                    [], info=i) + self.cmd_list + ['newlog=F']
                out = rje.Out(cmd_list=progcmd)
                out.printIntro(i)
                #self.debug(prog); self.debug(progcmd)
                if self.getBool('Help'):
                    progcmd = mod[prog].cmdHelp(i, out, ['help'] + progcmd)
                self.printLog('#CMD',
                              'Full %s CmdList: %s' %
                              (i.program,
                               rje.argString(
                                   rje.tidyArgs(progcmd,
                                                nopath=self.getStrLC('Rest')
                                                and not self.dev(),
                                                purgelist=purgelist))),
                              screen=False)
                #self.debug(prog); self.debug(progcmd)
                ## ~ [1a] ~ Make self.obj['Prog'] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
                if prog in ['seqlist', 'rje_seqlist']:
                    self.obj['Prog'] = rje_seqlist.SeqList(self.log, progcmd)
                elif prog in ['uniprot', 'rje_uniprot']:
                    self.obj['Prog'] = rje_uniprot.UniProt(self.log, progcmd)
                elif prog in ['taxonomy', 'rje_taxonomy']:
                    self.obj['Prog'] = rje_taxonomy.Taxonomy(self.log, progcmd)
                elif prog in ['tree', 'rje_tree']:
                    self.obj['Prog'] = rje_tree.Tree(self.log, progcmd)
                elif prog in ['xref', 'rje_xref']:
                    self.obj['Prog'] = rje_xref.XRef(self.log, progcmd)
                elif prog in ['seq', 'rje_seq']:
                    self.obj['Prog'] = rje_seq.SeqList(self.log, progcmd)
                elif prog in ['mitab', 'rje_mitab']:
                    self.obj['Prog'] = rje_mitab.MITAB(self.log, progcmd)
                elif prog in ['dbase', 'database']:
                    self.obj['Prog'] = rje_dbase.DatabaseController(
                        self.log, progcmd)
                elif prog in ['pydocs']:
                    self.obj['Prog'] = rje_pydocs.PyDoc(self.log, progcmd)
                elif prog in ['ensembl', 'rje_ensembl']:
                    self.obj['Prog'] = rje_ensembl.EnsEMBL(self.log, progcmd)
                elif prog in ['genbank', 'rje_genbank']:
                    self.obj['Prog'] = rje_genbank.GenBank(self.log, progcmd)
                elif prog in ['extatic']:
                    self.obj['Prog'] = extatic.ExTATIC(self.log, progcmd)
                elif prog in ['revert']:
                    self.obj['Prog'] = revert.REVERT(self.log, progcmd)
                elif prog in ['fiesta']:
                    self.obj['Prog'] = fiesta.FIESTA(self.log, progcmd)
                elif prog in ['gablam']:
                    self.obj['Prog'] = gablam.GABLAM(self.log, progcmd)
                elif prog in ['gopher']:
                    self.obj['Prog'] = gopher.Gopher(self.log, progcmd)
                elif prog in ['haqesac']:
                    self.obj['Prog'] = haqesac.HAQESAC(self.log, progcmd)
                elif prog in ['multihaq']:
                    self.obj['Prog'] = multihaq.MultiHAQ(self.log, progcmd)
                elif prog in ['pingu']:
                    self.obj['Prog'] = pingu.PINGU(self.log, progcmd)
                elif prog in ['pacbio']:
                    self.obj['Prog'] = rje_pacbio.PacBio(self.log, progcmd)
                elif prog in ['rje_zen', 'zen']:
                    self.obj['Prog'] = rje_zen.Zen(self.log, progcmd)

            ### ~ [2] ~ Failure to recognise program ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
            if not self.obj['Prog']:
                self.printLog(
                    '#ERR',
                    'Program "%s" not recognised.' % self.getStr('Name'))
                if self.i() < 0: return False
                if rje.yesNo('Show SeqSuite help with program options?'):
                    extracmd = cmdHelp(cmd_list=['help'])[1:]
                    if extracmd:
                        self.cmd_list += extracmd
                        self._cmdList()
                        if prog != self.getStrLC('Name'): return self.setup()
                self.setStr({
                    'Name':
                    rje.choice('Give program name (Blank or CTRL+C to quit)')
                })
                if self.getStrLC('Name'): return self.setup()
                else: return False
            return self.obj['Prog']  # Setup successful
        except KeyboardInterrupt:
            return False
        except SystemExit:
            raise
        except:
            self.errorLog('Problem during %s setup.' % self.prog())
            return False  # Setup failed
Esempio n. 24
0
 def setup(self):  ### Main class setup method.
     '''Main class setup method.'''
     try:  ### ~ [1] Setup ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         self.obj['DB'] = rje_db.Database(self.log, self.cmd_list)
         self.db().basefile(self.basefile())
         self.list['Accuracy'] = [0, 1.0 - self.getNum('ErrPerBase')]
         ## ~ [1a] SMRTReads ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
         while self.getStrLC('SMRTUnits') not in ['reads', 'gb', 'mb']:
             txt = 'SMRTUnits "%s" not recognised'
             if self.getNum('SMRTReads') < 10: smrtunits = 'Gb'
             elif self.getNum('SMRTReads') > 10000: smrtunits = 'reads'
             else: smrtunits = 'Mb'
             if self.i() < 0 or rje.yesNo(
                     '%s: switch to (%s) %s?' %
                 (txt, self.getNum('SMRTReads'), smrtunits)):
                 self.setStr({'SMRTUnits': smrtunits})
             elif self.i() > 0:
                 self.setStr(
                     {'SMRTUnits': rje.choice('SMRTUnits (reads/Gb/Mb)?')})
             self.printLog('#UNITS',
                           '%s => %s' % (txt, self.getStr('SMRTUnits')))
         if self.getStrLC('SMRTUnits') in ['gb', 'mb']:
             smrttotal = self.getNum('SMRTReads') * {
                 'gb': 1e9,
                 'mb': 1e6
             }[self.getStrLC('SMRTUnits')]
             txt = '%s %s @ %.3f kb/read' % (self.getNum('SMRTReads'),
                                             self.getStr('SMRTUnits'),
                                             self.getNum('AvRead') / 1000.0)
             self.setNum({'SMRTReads': smrttotal / self.getNum('AvRead')})
             txt += ' => %s reads' % rje.iStr(int(self.getNum('SMRTReads')))
             self.printLog('#READS', txt)
         ## ~ [1b] XnList ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
         xnlist = []
         for xn in self.list['XnList']:
             if xn == '': continue
             try:
                 ixn = int(xn)
                 if xn not in [ixn, '%d' % ixn]:
                     self.printLog('#XN', '"%s" -> %dX' % (xn, ixn))
                 if ixn == 0:
                     self.printLog(
                         '#XN', 'No point in 0X output: use 1-%Coverage.')
                 elif ixn == 1:
                     self.printLog('#XN',
                                   'No point in 1X output: use %Coverage.')
                 else:
                     xnlist.append(ixn)
             except:
                 self.errorLog(
                     'Could not process %s as part of XnList. (Integers only.)'
                     % xn)
         xnlist.sort()
         if xnlist:
             self.printLog(
                 '#XN', 'XnList: %sX.' %
                 string.join(string.split('%s' % xnlist, ','), 'X, ')[1:-1])
         self.list['XnList'] = xnlist
         return True  # Setup successful
     except:
         self.errorLog('Problem during %s setup.' % self.prog())
         return False  # Setup failed
Esempio n. 25
0
    def convert(self,
                filelist=[],
                outfile=None
                ):  ### Converts scansite output files in FileList to Outfile
        '''
        Converts scansite output files in FileList to Outfile.
        '''
        try:
            ### Setup ###
            _stage = 'Setup'
            if len(filelist) < 1:
                filelist = self.list['FileList']
            if not outfile:
                outfile = self.info['Name']
            if len(filelist) < 1:
                self.log.errorLog(
                    'No scansite files to convert! %s unchanged/not made.' %
                    outfile,
                    printerror=False)
                return False
            delimit = rje.getDelimit(self.cmd_list)
            ext = rje.delimitExt(delimit)
            if ext != outfile[-3:]:
                newfile = outfile[:-3] + ext
                if rje.yesNo('Change file name from %s to %s?' %
                             (outfile, newfile)):
                    outfile = newfile
            self.log.printLog(
                '#OUT', 'Converting %d file(s), output to %s.' %
                (len(filelist), outfile))

            ### Output File ###
            _stage = 'Output File'
            if not self.opt['Append'] or not os.path.exists(
                    outfile):  # Create with header
                OUTFILE = open(outfile, 'w')
                headers = [
                    'seq_id', 'enzyme', 'enz_group', 'aa', 'pos', 'score',
                    'percentile', 'matchseq', 'sa'
                ]
                rje.writeDelimit(OUTFILE, headers, delimit)
            else:
                OUTFILE = open(outfile, 'a')

            ### Conversion ###
            _stage = 'Conversion'
            sx = 0
            for infile in filelist:
                if not os.path.exists(infile):
                    self.log.errorLog(
                        'Input file %s does not exist! :o(' % infile, False,
                        False)
                    continue
                fx = 0
                INFILE = open(infile, 'r')
                inline = rje.nextLine(INFILE)
                while inline != None:
                    if rje.matchExp(re_scansite, inline):
                        scanlist = rje.matchExp(re_scansite, inline)
                    rje.writeDelimit(OUTFILE, scanlist, delimit)
                    sx += 1
                    fx += 1
                    rje.progressPrint(self, sx)
                    inline = rje.nextLine(INFILE)
                self.log.printLog(
                    '#OUT', '%s scansite results from %s. (%s Total.)' %
                    (rje.integerString(fx), infile, rje.integerString(sx)))
                INFILE.close()

            ### End ###
            _stage = 'End'
            OUTFILE.close()
            self.log.printLog(
                '#OUT', '%s scansite results output to %s.' %
                (rje.integerString(sx), outfile))
            return True
        except:
            self.log.errorLog('Error in convert(%s)' % _stage,
                              printerror=True,
                              quitchoice=False)
            raise
Esempio n. 26
0
 def mapHit(self,seq,hits,hitdict,method):     ### Tries to map seq onto hitseq and returns hit if successful
     '''
     Tries to map seq onto hitseq and returns hit if successful.
     >> seq:Query Sequence Object
     >> hits:List of hits in rough order of goodness
     >> hitdict:Dictionary of {hitname:stats}
     >> method:Mapping method to use
     '''
     try:### ~ [0] Setup ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         (name,sequence) = seq
         data = rje_sequence.extractNameDetails(name,self)
         data['Sequence'] = seq[1]
         data['ShortName'] = string.split(seq[0])[0]
         for hit in hitdict:
             hitdict[hit]['Data'] = rje_sequence.extractNameDetails(hitdict[hit]['Seq'][0],self)
             hitdict[hit]['Data']['Sequence'] = hitdict[hit]['Seq'][1]
             hitdict[hit]['Data']['ShortName'] = string.split(hitdict[hit]['Seq'][0])[0]
         ### SkipGene ###
         if method == 'id' and rje.matchExp('^(\S+)_\S+',data['ID']):
             gene = rje.matchExp('^(\S+)_\S+',data['ID'])
             if gene in self.list['SkipGene']:
                 return None
         ### Name, AccNum, Sequence and ID ###
         if method_info[method] in ['Name', 'AccNum', 'Sequence', 'ID']:
             for hit in hits:
                 hitdata = hitdict[hit['Hit']]['Data']
                 if hitdata[method_info[method]] == data[method_info[method]]:
                     if self.i() < 2 or rje.yesNo('Map %s to %s?' % (data['ShortName'],hitdata['ShortName'])):
                         return hit
         ### DescAcc ###
         if method == 'descacc':
             for hit in hits:
                 hitdata = hitdict[hit['Hit']]['Data']
                 if rje.matchExp('\W(%s)\W' % data['AccNum'],hitdata['Name']):
                     if self.i() < 2 or rje.yesNo('Map %s to %s?' % (data['ShortName'],hitdata['ShortName'])):
                         return hit
         ### GABLAM ###
         if method != 'gablam': return None
         focus = self.str['MapFocus'][:1].upper() + self.str['MapFocus'][1:].lower()
         gstat = gstat_type[self.str['MapStat'].lower()]
         possibles = []  # List of Hits that meet MinMap criterion
         for hit in hits:
             hitname = hit['Hit']
             hitdata = hitdict[hit['Hit']]['Data']
             if self.getNum('AutoMap') > 0.0 and hitdict[hitname]['%s_%s' % (focus,gstat)] >= self.getNum('AutoMap'):
                 if self.i() < 2 or rje.yesNo('Map %s to %s?' % (data['ShortName'],hitdata['ShortName'])):
                     return hit
             elif hitdict[hitname]['%s_%s' % (focus,gstat)] >= self.getNum('MinMap'):
                 possibles.append(hit)
         ### Manual GABLAM Choice ###
         if self.i() < 0 or not possibles: return None
         possibles.reverse()
         print '\nMapping options for %s:\n' % data['ShortName']
         for p in range(len(possibles)):
             hit = possibles[p]
             hitname = hit['Hit']
             hitdata = hitdict[hit['Hit']]['Data']
             print '<%d> %s (%d aa) =\t' % (len(possibles)-p,hitdata['Name'],hit['Length']),
             print '%.1f%% Qry Len,' % (100.0 * hit['Length'] / len(seq[1])),
             print '%.1f%% ID (%.1f%% Sim, %.1f%% Cov.)' % (hitdict[hitname]['Hit_ID'],hitdict[hitname]['Hit_Sim'],hitdict[hitname]['Hit_Len']),
             print '(Qry: %.1f%% ID (%.1f%% Sim, %.1f%% Cov.)' % (hitdict[hitname]['Query_ID'],hitdict[hitname]['Query_Sim'],hitdict[hitname]['Query_Len'])
         choice = -1
         print '<0> No mapping.\n'
         ## Choice ##
         while 1:
             choice = rje.getInt('Select sequence to replace %s?' % data['ShortName'],default=1,confirm=True)
             i = len(possibles) - choice
             if choice == 0: # No mapping
                 if self.i() < 2 or rje.yesNo('No GABLAM mapping for %s?' % (data['ShortName'])): return None
             elif choice > 0 and choice <= len(possibles):    
                 hit = possibles[i]
                 hitdata = hitdict[hit['Hit']]['Data']
                 if self.i() < 2 or rje.yesNo('Map %s to %s?' % (data['ShortName'],hitdata['ShortName'])): return hit
     except:
         self.errorLog('Problem during SeqMapper.mapHit(%s)' % method,quitchoice=True)
         return None
Esempio n. 27
0
    def setup(self):  ### Main class setup method.
        '''Main class setup method.'''
        try:  ### ~ [1] ~ Setup Program ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
            self.obj['Prog'] = None
            prog = self.getStrLC('Name')
            if prog in mod:
                i = self.obj['ProgInfo'] = mod[prog].makeInfo()
                self.printLog(
                    '#PROG',
                    '%s V%s: %s' % (i.program, i.version, i.description))
                progcmd = rje.getCmdList(
                    [], info=i) + self.cmd_list + ['newlog=F']
                out = rje.Out(cmd_list=progcmd)
                out.printIntro(i)
                if self.getBool('Help'):
                    progcmd = mod[prog].cmdHelp(i, out, ['help'] + progcmd)
                purgelist = seqsuite.purgelist
                self.printLog('#CMD',
                              'Full %s CmdList: %s' %
                              (i.program,
                               rje.argString(
                                   rje.tidyArgs(progcmd,
                                                nopath=self.getStrLC('Rest')
                                                and not self.dev(),
                                                purgelist=purgelist))),
                              screen=False)
                #self.debug(prog)
                ## ~ [1a] ~ Make self.obj['Prog'] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
                if prog in ['slimcore', 'rje_slimcore']:
                    self.obj['Prog'] = rje_slimcore.SLiMCore(self.log, progcmd)
                elif prog in ['rlc', 'disorder']:
                    self.obj['Prog'] = rje_slimcore.SLiMCore(
                        self.log, progcmd + ['prog=%s' % prog])
                elif prog in ['slimlist', 'rje_slimlist']:
                    self.obj['Prog'] = rje_slimlist.SLiMList(self.log, progcmd)
                elif prog in ['slimfinder']:
                    self.obj['Prog'] = slimfinder.SLiMFinder(self.log, progcmd)
                elif prog in ['qslimfinder']:
                    self.obj['Prog'] = qslimfinder.QSLiMFinder(
                        self.log, progcmd)
                elif prog in ['slimprob']:
                    self.obj['Prog'] = slimprob.SLiMProb(self.log, progcmd)
                elif prog in ['slimmaker']:
                    self.obj['Prog'] = slimmaker.SLiMMaker(self.log, progcmd)
                elif prog in ['slimfarmer', 'farm']:
                    self.obj['Prog'] = slimfarmer.SLiMFarmer(self.log, progcmd)
                elif prog in ['slimbench']:
                    self.obj['Prog'] = slimbench.SLiMBench(self.log, progcmd)
                elif prog in ['comparimotif']:
                    self.obj['Prog'] = comparimotif.CompariMotif(
                        self.log, progcmd)
                elif prog in ['peptcluster']:
                    self.obj['Prog'] = peptcluster.PeptCluster(
                        self.log, progcmd)
                elif prog in ['peptalign']:
                    self.obj['Prog'] = peptcluster.PeptCluster(
                        self.log, ['peptalign=T'] + progcmd)
                self.obj['Prog'].dict['Output']['help'] = mod[prog].__doc__
            elif prog in seqsuite.mod:
                seqsuiteobj = seqsuite.SeqSuite(self.log, self.cmd_list)
                self.obj['Prog'] = seqsuiteobj.setup()
                self.obj['ProgInfo'] = seqsuiteobj.obj['ProgInfo']
                self.obj['Prog'].dict['Output']['help'] = seqsuite.mod[
                    prog].__doc__

            ### ~ [2] ~ Failure to recognise program ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
            if not self.obj['Prog']:
                self.printLog(
                    '#ERR',
                    'Program "%s" not recognised.' % self.getStr('Name'))
                if self.i() < 0: return False
                #!# Try SeqSuite? #!#
                if rje.yesNo('Show SLiMSuite help with program options?'):
                    extracmd = cmdHelp(cmd_list=['help'])[1:]
                    if extracmd:
                        self.cmd_list += extracmd
                        self._cmdList()
                        if prog != self.getStrLC('Name'): return self.setup()
                self.setStr({
                    'Name':
                    rje.choice('Give program name (Blank or CTRL+C to quit)')
                })
                if self.getStrLC('Name'): return self.setup()
                else: return False
            return self.obj['Prog']  # Setup successful
        except KeyboardInterrupt:
            return False
        except SystemExit:
            raise
        except:
            self.errorLog('Problem during %s setup.' % self.prog())
            return False  # Setup failed
Esempio n. 28
0
 def difference(self, table1,
                table2):  ### Generates differences as new table
     '''
     Generates differences as new table.
     >> table1:Table = iTunes database table to compare
     >> table2:Table = iTunes database table to compare
     '''
     try:  ### ~ [1] Setup ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         dfields = [
             'Name', 'Artist', 'Composer', 'Album', 'Album_Artist', 'Genre',
             'Time', 'Disc Number', 'Disc Count', 'Track Number',
             'Track Count', 'Year', 'Date Added', 'Plays', 'Last Played',
             'Skips', 'Last Skipped', 'My Rating', 'Location', 'Tracks',
             'Score'
         ]
         db = self.db()
         tabindex = '#Artist#|#Album#|#Track Number#|#Name#'
         try:
             age1 = string.atoi(string.split(table1.name(), '.')[-1])
             age2 = string.atoi(string.split(table2.name(), '.')[-1])
             table1.index(tabindex, make=True)
             table2.index(tabindex, make=True)
             if age1 < age2:
                 oldtable = table1
                 newtable = table2
                 newdate = age2
             else:
                 newtable = table1
                 oldtable = table2
                 newdate = age1
             diftable = db.copyTable(
                 newtable, '%s-%s' %
                 (oldtable.name(), string.split(newtable.name(), '.')[-1]))
             diftable.keepFields(dfields + [tabindex])
             diftable.addField('Status')
         except:
             self.errorLog('Cannot generate differences for %s and %s' %
                           (table1, table2))
         ### ~ [2] Process ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         self.printLog(
             '#NEW', '%s tracks in new iTunes export.' %
             rje.iStr(newtable.entryNum()))
         self.printLog(
             '#OLD', '%s tracks in old iTunes export.' %
             rje.iStr(oldtable.entryNum()))
         oldfiles = oldtable.datakeys()[0:]
         for entry in diftable.entries():
             ## ~ [2a] Find pair of entries ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
             if entry['Location'] in oldfiles:
                 oldentry = oldtable.data(entry['Location'])
             elif entry[tabindex] in oldtable.index(tabindex):
                 oldentry = oldtable.indexEntries(tabindex,
                                                  entry[tabindex])[0]
                 if len(oldtable.indexEntries(tabindex,
                                              entry[tabindex])) == 1:
                     pass
                 else:
                     self.printLog(
                         '#DUP',
                         'Duplicate entries for %s' % entry[tabindex])
                     for ientry in oldtable.indexEntries(
                             tabindex, entry[tabindex]):
                         if ientry['Location'] in oldfiles:
                             oldentry = ientry
                             break
             else:
                 oldentry = None
             #self.deBug(entry)
             #self.deBug(oldentry)
             ## ~ [2b] Generate Differences ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
             if not oldentry:
                 entry['Status'] = 'New'
                 continue
             #self.deBug(oldentry['Location'] in oldfiles)
             if oldentry['Location'] in oldfiles:
                 oldfiles.remove(oldentry['Location'])
             #self.deBug(len(oldfiles))
             changed = False
             for field in ['Plays', 'Skips', 'My Rating']:
                 if entry[field] != oldentry[field]: changed = True
                 try:
                     entry[field] -= oldentry[field]
                 except:
                     pass  # Keep new value - probably empty in old entry
             if changed: entry['Status'] = 'Changed'
             else: entry['Status'] = 'Unchanged'
         ### ~ [3] Add missing old entries ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         reportdel = rje.yesNo('Report deleted %s tracks?' %
                               diftable.name())
         for old in oldfiles:
             entry = diftable.addEntry(oldtable.data(old))
             entry['Status'] = 'Deleted'
             if reportdel:
                 self.printLog(
                     '#DEL', '%s: %s [%s]' %
                     (entry['Artist'], entry['Name'], entry['Album']))
         ### ~ [4] Finish ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         for status in rje.sortKeys(diftable.index('Status')):
             self.printLog(
                 '#STAT', '%s: %d tracks' %
                 (status.upper(), len(diftable.index('Status')[status])))
         self.printLog('#TRACK',
                       '%s tracks in total' % rje.iStr(diftable.entryNum()))
         self.deBug('?')
         for table in [table1, table2, diftable]:
             table.dropField(tabindex)
         diftable.saveToFile('%s.tdt' % diftable.name())
     except:
         self.errorLog('%s.difference() error' % self)
Esempio n. 29
0
    def pairwiseAQ(
            self,
            seqlist=None,
            query=None,
            focus=[0, 0]):  ### Performs PAQ on seqlist, adding seq.info['PAQ']
        '''
        Performs PAQ on seqlist, adding seq.info['PAQ']
        >> seqlist:rje_seq.SeqList Object
        - NB. This object will itself have sequences removed from it, so beware!
        - A new info key will be added: PAQ = PAQ sequences with alignment Xs
        >> focus:list of range positions [X:Y] to look at. If Y=0 then [X:]. 
        '''
        ### <PAQ0> ### Setup
        try:
            _stage = '<0> Setup'
            haqlist = seqlist  # SeqList Object to store individually Xd sequences
            if not query:
                query = haqlist.obj['QuerySeq']
            if self.opt['NoQuery'] or not query:
                query = haqlist.seq[random.randint(0, haqlist.seqNum() - 1)]
                self.log.printLog(
                    '#QRY', 'Temp (random) query %s assigned for PAQ' %
                    query.shortName())
            #!# paqx = [False] * seqlist.seq[0].seqLen()    # List of whether a column of the alignment is bad (has an X) [True] or not [False]
            #!# - make this a method?!

            pwaq = {}  # Dictionary of lists of pairwise alignements
            block_align = {
            }  # Dictionary of whether residue in block of sequence that is well-aligned or not
            for seq in haqlist.seq:
                block_align[seq] = [False] * seq.seqLen()
                seq.info['PAQ'] = seq.info['Sequence'][0:]
                if seq.info.has_key('SAQX') and len(
                        seq.info['SAQX']
                ) == seq.seqLen(
                ):  #!# Should no longer be issues due to length changes following realignment
                    seq.info['Sequence'] = seq.info['SAQX'][0:]
                elif seq.info.has_key('SAQX'):
                    self.log.errorLog(
                        'Cannot use SAQX for %s in PAQ as wrong length.' %
                        seq.shortName(),
                        printerror=False)
                for otherseq in haqlist.seq:
                    pwaq[(seq, otherseq)] = [False] * seq.seqLen()

        ### <PAQ1> ### Directional Pairwise Comparisons of sequences
            _stage = '<1> Pairwise Comparisons'
            infotxt = 'PAQ%d: Pairwise Comparisons ...' % self.stat['PAQCyc']
            #print self.stat
            for seq in haqlist.seq:
                for otherseq in haqlist.seq:
                    myinfo = '%s %.1f%% %.1f%%   ' % (
                        infotxt,
                        (100.0 * haqlist.seq.index(seq) / haqlist.seqNum()),
                        (100.0 * haqlist.seq.index(otherseq) /
                         haqlist.seqNum()))
                    self.log.printLog('\r#PAQ',
                                      myinfo,
                                      log=False,
                                      newline=False)
                    for r in range(seq.seqLen()):
                        ar = seq.info['Sequence'][r]
                        ## <i> ## Look for PW aligned block
                        _stage = '<1-i> Pairwise Comparisons'
                        if ar not in ['-', 'X']:  # Start of test block
                            blen = 0  # Block length (PAQBlock) = AAs
                            win = 0  # Window length = all sequence
                            matchx = 0  # Score for residues in window
                            while blen < self.stat['PAQBlock'] and (
                                    r + win
                            ) < seq.seqLen(
                            ):  # This time we allow overshoots in both directions
                                ar = seq.info['Sequence'][r + win]
                                at = otherseq.info['Sequence'][r + win]
                                if 'X' in [ar, at]:  # Hit Bad Region: Abort
                                    break
                                else:  # Better region
                                    if ar != '-':
                                        blen += 1  # Increase Block
                                        matchx += self._saqCon(ar, at)
                                win += 1
                        ## <ii> ## Update pwaq if block good
                            _stage = '<1-ii> Pairwise Comparisons'
                            if matchx >= self.stat['PAQMatch']:
                                for w in range(win):
                                    if seq.info['Sequence'][r +
                                                            w] in ['-', 'X']:
                                        pwaq[(seq, otherseq)][r + w] = False
                                    else:
                                        pwaq[(seq, otherseq)][r + w] = True
            self.log.printLog('\r#PAQ',
                              '%s 100.0% 100.0%.   ' % infotxt,
                              log=False)

            ### <PAQ2> ### Link back to Query
            _stage = '<2> Linking to Query'
            ### <PAQ2a> ### Network of Pairwise Quality alignments
            _stage = '<2a> Linking to Query'
            #self.verbose(1,3,'PAQ%d: Linking Residues to Query (%s)' % (self.stat['PAQCyc'],query.shortName()),0)
            infotxt = 'PAQ%d: Linking Residues to Query (%s) ...' % (
                self.stat['PAQCyc'], query.shortName())
            for r in range(query.seqLen()):
                _stage = '<2a> Linking to Query'
                self.log.printLog('\r#PAQ',
                                  '%s %.1f%%' % (infotxt,
                                                 (100.0 * r / query.seqLen())),
                                  log=False,
                                  newline=False)
                qok = {
                }  # Dictionary of whether residue in seq OK, i.e. linked to query
                for seq in haqlist.seq:
                    qok[seq] = False
                qok[query] = True
                sok = [0, 1]  # List of OK sequence for residue
                while sok[-2] != sok[-1]:
                    ## <i> ## Match pairs, starting with query
                    _stage = '<2a-i> Linking to Query'
                    for seq in haqlist.seq:
                        if qok[seq]:
                            for otherseq in haqlist.seq:
                                if pwaq[(seq, otherseq)][r] or pwaq[(otherseq,
                                                                     seq)][r]:
                                    qok[otherseq] = True
                    ## <ii> ## Update sok
                    _stage = '<2a-ii> Linking to Query'
                    sok.append(0)
                    for seq in haqlist.seq:
                        if qok[seq]:
                            sok[-1] += 1
                            block_align[seq][r] = True
                _stage = '<2a-iii> Linking to Query'
                if sok[-1] == 1:  # Only query OK!
                    block_align[query][r] = False
            self.log.printLog('\r#PAQ', '%s 100.0%%' % infotxt, log=False)

            ### <PAQ2b> ### Allow for divergence (Conserved Anchors)
            _stage = '<2b> Anchors'
            if self.opt['Anchors']:
                infotxt = 'PAQ%d: Accounting for divergence within aligned regions ...' % self.stat[
                    'PAQCyc']
                ## <i> ## Setup gapped list
                gapped = [
                    False
                ] * query.seqLen()  # Whether column of alignment is gapped
                for seq in haqlist.seq:
                    self.log.printLog(
                        '\r#PAQ',
                        '%s %.1f%%  ' %
                        (infotxt,
                         (50.0 * haqlist.seq.index(seq) / haqlist.seqNum())),
                        log=False,
                        newline=False)
                    (start, end) = (0, seq.seqLen())
                    while seq.info['Sequence'][start] == '-':
                        start += 1
                    while seq.info['Sequence'][end - 1] == '-':
                        end -= 1
                    for r in range(start, end):
                        if seq.info['Sequence'][r] == '-':
                            gapped[r] = True
                ## <ii> ## Correction
                for seq in haqlist.seq:
                    self.log.printLog(
                        '\r#PAQ',
                        '%s %.1f%%  ' %
                        (infotxt,
                         (50 +
                          (50.0 * haqlist.seq.index(seq) / haqlist.seqNum()))),
                        log=False,
                        newline=False)
                    for r in range(seq.seqLen()):
                        if block_align[seq][r] or gapped[
                                r]:  # No need for correction
                            continue
                        # Move in both directions: if good residues (or sequence end) reached before gaps then reinstate
                        winf = 0
                        fwd = True
                        fok = False
                        winb = 0
                        bwd = True
                        bok = False
                        while fwd or bwd:
                            # End of seqs
                            if (r + winf) >= seq.seqLen():
                                fwd = False
                            if (r - winb) < 0:
                                bwd = False
                            # Gaps/OK
                            if fwd:
                                if gapped[r + winf]:
                                    fok = False
                                    fwd = False
                                elif block_align[seq][r + winf]:
                                    fwd = False
                                else:
                                    winf += 1
                            if bwd:
                                if gapped[r - winb]:
                                    bok = False
                                    bwd = False
                                elif block_align[seq][r - winb]:
                                    bwd = False
                                else:
                                    winb += 1
                        if fok and bok:  # Reinstate
                            for w in range(r - winb, r + winf + 1):
                                block_align[seq][w] = True
                self.log.printLog('\r#PAQ',
                                  '%s 100.0%%  ' % infotxt,
                                  log=False)

        ### <PAQ3> ### X out badly-aligned blocks
            _stage = '<3> Making bad sequence blocks'
            for seq in haqlist.seq:
                newseq = ''
                for r in range(seq.seqLen()):
                    if block_align[seq][r] or seq.info['Sequence'][r] == '-':
                        newseq += seq.info['Sequence'][r]
                    else:  # Bad residue
                        newseq += 'X'
                seq.info['Sequence'] = newseq[0:]
            #!# Add saving of data in 'datafull' option

        ### <PAQ4> ### Remove sequences and/or badly-aligned regions
            _stage = '<4> Removing sequences/regions'
            self.verbose(
                0, 4, 'PAQ%d: Removing bad sequences and/or dodgy regions...' %
                self.stat['PAQCyc'], 0)
            ## <PAQ4a> ## Process Query first - only interested in good regions within query
            if self.opt['NoQuery']:  # No preprocessing of Query
                self.verbose(0, 4, 'no Master Query processing...', 0)
            else:
                haqlist.mapX(
                    query, qtrim=True, focus=focus
                )  # Replaces other sequence ends and query X columns with Xs
                self.verbose(0, 4,
                             'Query (%s) processed...' % query.shortName(), 0)
            self.verbose(0, 3, '', 1)
            if self.opt['ManPAQ']:
                haqlist.saveFasta(seqfile='%s.manpaq.fas' %
                                  haqlist.info['Basefile'])

            ## <PAQ4b> ## Cycle through other sequences (worst first) until no more good residues are lost
            goodres = [0, self._getGood(haqlist.seq)
                       ]  # List of number of 'good' residues
            goodseq = [0, haqlist.seqNum()]
            while goodres[-1] != goodres[-2] or goodseq[-1] != goodseq[-2]:
                colgood = [
                    0
                ] * haqlist.seq[0].seqLen()  # Good residues per column
                for r in range(haqlist.seq[0].seqLen()):
                    for seq in haqlist.seq:
                        if seq.info['Sequence'][r] != '-' and seq.info[
                                'Sequence'][r] != 'X':
                            colgood[r] += 1
                ## <i> ## Compare relative loss of masking and losing each sequence
                keepx = {
                }  # Dictionary of seq:number of lost residues if seq kept
                losex = {
                }  # Dictionary of seq:number of lost residues if seq lost
                badkx = -1  # Biggest loss if kept
                badlx = -1  # Biggest loss if lost
                bads = None  # Worst sequence
                for seq in haqlist.seq:
                    if seq == query and self.opt['NoQuery'] == False:
                        continue  # Next sequence
                    # Calculate keepx and losex
                    keepx[seq] = 0
                    for r in range(seq.seqLen()):
                        if seq.info['Sequence'][r] == 'X':
                            keepx[seq] += colgood[r]
                        #?# In Perl HAQESAC there was an option to ignore Orphans in this calculation. Reinstate?
                    losex[seq] = self._getGood([seq])
                    # Update bads if worse
                    if keepx[seq] > badkx:
                        badkx = keepx[seq]
                        badlx = losex[seq]
                        bads = seq
                    elif keepx[seq] == badkx and losex[seq] < badlx:
                        badlx = losex[seq]
                        bads = seq
                ## <ii> ## Remove bad sequences and/or regions
                if badkx > 0:
                    if self.opt['ManPAQ']:
                        default = 'N'
                        if badkx * self.stat['PAQKeepLen'] > badlx * self.stat[
                                'PAQKeepSeq']:  # Lose sequence!
                            default = 'Y'
                        if rje.yesNo(
                                '%s worst: -%s aa if kept vs -%s aa if lost. Remove?'
                                % (bads.shortName(), rje.integerString(badkx),
                                   rje.integerString(badlx)), default):
                            seqlist.removeSeq(
                                text=
                                'PAQ%d: -%s aa if kept vs -%s aa if lost. (Manual decision.)'
                                %
                                (self.stat['PAQCyc'], rje.integerString(badkx),
                                 rje.integerString(badlx)),
                                seq=bads)
                        else:  # X out
                            haqlist.mapX(bads)
                    else:
                        self.verbose(
                            1, 3,
                            '%s worst: -%s aa if kept vs -%s aa if lost.' %
                            (bads.shortName(), rje.integerString(badkx),
                             rje.integerString(badlx)), 1)
                        #!# Add option for upweighting certain sequence type? (e.g. vs fragment or hypothetical?)
                        if badkx * self.stat['PAQKeepLen'] > badlx * self.stat[
                                'PAQKeepSeq']:  # Lose sequence!
                            seqlist.removeSeq(
                                text='PAQ%d: -%s aa if kept vs -%s aa if lost.'
                                %
                                (self.stat['PAQCyc'], rje.integerString(badkx),
                                 rje.integerString(badlx)),
                                seq=bads)
                        else:  # X out
                            haqlist.mapX(bads)
                ### <iii> ### Recalculate goodres
                goodres.append(self._getGood(haqlist.seq))
                goodseq.append(haqlist.seqNum())
                self.verbose(1, 3,
                             '%d -> %d "good" aa' % (goodres[-2], goodres[-1]),
                             1)

        ### <PAQ5> ### Reinstate UnX'd sequence:
            _stage = '<5> Replacing sequences'
            for seq in haqlist.seq:
                [seq.info['PAQ'], seq.info['Sequence']
                 ] = [seq.info['Sequence'], seq.info['PAQ']]
            if self.opt['ManPAQ'] and rje.checkForFile(
                    '%s.manpaq.fas' % haqlist.info['Basefile']):
                os.unlink('%s.manpaq.fas' % haqlist.info['Basefile'])

        except:
            self.log.errorLog(
                'rje_haq.py ~ Problem with pairwiseAQ %s.' % _stage, True)
Esempio n. 30
0
 def multiHAQ(self, secondrun=False):  ### Executes main HAQESAC runs
     '''Executes main HAQESAC runs.'''
     try:  ### ~ [0] Setup ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         finalrun = secondrun == self.opt[
             'MultiHAQ']  # Whether this is the manual HAQESAC phase
         qryacc = self.obj['SeqList'].accList(
         )  # Full list of Query accession numbers
         processed = []  # List of processed sequence accession numbers
         ### ~ [1] Peform HAQESAC runs ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         for seq in self.seqs():
             ## ~ [1a] Check AutoSkip ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
             acc = seq.info['AccNum']
             if finalrun and acc in processed and (
                     self.opt['AutoSkip'] or (self.i() >= 0 and rje.yesNo(
                         '%s already covered by previous HAQESAC. Skip?' %
                         seq.shortName()))):
                 self.printLog(
                     '#SKIP',
                     '%s already covered by previous HAQESAC: Skipped' %
                     seq.shortName())
                 continue
             ## ~ [1b] Check Whether to run (re-runs and low sequence number) ~~~~~~~~~~~~~~~~~~ ##
             logfile = rje.makePath('%s%s.log' % (self.info['HaqDir'], acc),
                                    wholepath=True)
             infile = rje.makePath('%s%s.fas' % (self.info['HaqDir'], acc),
                                   wholepath=True)
             pkfile = rje.makePath('%s%s.pickle' %
                                   (self.info['HaqDir'], acc),
                                   wholepath=True)
             pkzfile = rje.makePath('%s%s.pickle.gz' %
                                    (self.info['HaqDir'], acc),
                                    wholepath=True)
             if not os.path.exists(infile):
                 self.printLog(
                     '#SKIP', '%s input file %s not found: Skipped' %
                     (seq.shortName(), infile))
                 continue
             if not finalrun and not self.opt['Force'] and rje.isYounger(
                     pkzfile, infile) == pkzfile:
                 self.printLog('#SKIP',
                               '%s run detected: Skipped' % seq.shortName())
                 continue
             if not finalrun and not self.opt['Force'] and rje.isYounger(
                     pkfile, infile) == pkfile:
                 self.printLog('#SKIP',
                               '%s run detected: Skipped' % seq.shortName())
                 continue
             inseqx = rje_seq.SeqCount(self, infile)
             if inseqx < 2:
                 self.printLog(
                     '#SKIP',
                     'Only one sequence found in %s: Skipped' % (infile))
                 continue
             ## ~ [1c] Pause if running in Chaser Mode and no Pickle ~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
             pickled = os.path.exists(pkfile) or os.path.exists(
                 '%s.gz' % pkfile)
             tm = 0
             while secondrun and self.opt['Chaser'] and not pickled:
                 self.progLog(
                     '#WAIT',
                     'No %s pickle. Sleeping for %d min.' % (acc, tm))
                 time.sleep(60 * tm)
                 tm += 1
                 pickled = os.path.exists(pkfile) or os.path.exists(
                     '%s.gz' % pkfile)
                 if not pickled:
                     try:
                         rje.choice(
                             'Press <ENTER> to try again, or <CTRL+C> to Quit'
                         )
                     except:
                         self.printLog('#PICKLE',
                                       'No %s pickle.' % (acc, tm))
                         self.printLog('\r#MULTI',
                                       'Exiting multiHAQ "Chaser" run.')
                         return
             ## ~ [1d] Run HAQESAC ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
             runhaqesac = True
             pngfile = rje.makePath('%s%s.png' % (self.info['HaqDir'], acc),
                                    wholepath=True)
             if not self.force() and rje.exists(pngfile):
                 self.printLog(
                     '#SKIP',
                     'Found evidence of completed run: %s (force=F). Skipping.'
                     % pngfile)
                 runhaqesac = False
             ancfile = rje.makePath('%s%s.anc.fas' %
                                    (self.info['HaqDir'], acc),
                                    wholepath=True)
             if not self.force() and rje.exists(ancfile):
                 self.printLog(
                     '#SKIP',
                     'Found evidence of completed run: %s (force=F). Skipping.'
                     % ancfile)
                 runhaqesac = False
             #if not finalrun or self.opt['Force'] or rje.isYounger(logfile,nsfile) != logfile:
             if runhaqesac:
                 haqcmd = [
                     'ini=haqesac.ini',
                     'seqin=%s.fas' % acc,
                     'query=%s' % acc,
                     'basefile=%s' % acc, 'newlog=F'
                 ]
                 self.printLog(
                     '#HAQ',
                     'Running HAQESAC for %s - will have own log etc.' %
                     seq.shortName(),
                     log=False)
                 os.chdir(self.info['HaqDir'])
                 info = haqesac.makeInfo()
                 haqcmd = rje.getCmdList(haqcmd, info=info)
                 out = rje.Out(
                     cmd_list=haqcmd
                 )  # Sets up Out object for controlling output to screen
                 out.printIntro(
                     info
                 )  # Prints intro text using details from Info object
                 haqlog = rje.setLog(
                     info, out, haqcmd
                 )  # Sets up Log object for controlling log file output
                 try:
                     haqesac.HAQESAC(log=haqlog,
                                     cmd_list=haqcmd).run(setobjects=True)
                 except:
                     os.chdir(self.info['RunPath'])
                     if self.i() >= 0 and rje.yesNo(
                             'Problem with %s HAQESAC run. Abort?' %
                             seq.shortName()):
                         raise KeyboardInterrupt
                 os.chdir(self.info['RunPath'])
                 if finalrun:
                     self.printLog(
                         '#HAQ',
                         'HAQESAC final round run for %s' % seq.shortName())
                 else:
                     self.printLog(
                         '#HAQ',
                         'HAQESAC first round run for %s' % seq.shortName())
             ## ~ [1e] Update ScreenQry ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
             if not self.opt['ScreenQry'] or not finalrun: continue
             qacclist = []
             for qacc in rje_seq.SeqList(
                     self.log,
                 ['seqin=%s' % infile, 'autoload=T', 'autofilter=F'
                  ]).accList():
                 if qacc in qryacc and qacc != acc: qacclist.append(qacc)
                 if qacc in qryacc and qacc not in processed:
                     processed.append(qacc)
             self.printLog(
                 '#QRY', '%d other queries found in %s: [%s]' %
                 (len(qacclist), infile, string.join(qacclist, '; ')))
             self.printLog(
                 '#QRY', '%d of %d queries processed' %
                 (len(processed), self.seqNum()))
         ### ~ [2] MultiHAQ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         if not finalrun:
             self.printLog('#MULTI', 'Executing second round of multiHAQ')
             self.multiHAQ(True)
     except:
         self.errorLog('Major problem with MultiHAQ.multiHAQ',
                       quitchoice=True)
Esempio n. 31
0
    def singleSeqAQ(self,
                    seqlist,
                    focus=[
                        0, -1
                    ]):  ### Performs SAQ on seqlist, adding seq.info['SAQ']
        '''
        Performs SAQ on seqlist, adding seq.info['SAQ'].
        >> seqlist:rje_seq.SeqList Object
        - NB. This object will itself have sequences removed from it, so beware!
        - A new info key will be added: SAQX = SAQ sequences with individual Xs
        - A new info key will be added: SAQ = SAQ sequences with aligment Xs
        >> focus:list of range positions [X:Y] to look at. If Y=0 then [X:].
        '''
        ### <SAQ1> ### Setup
        try:
            _stage = '<1> Setup'
            haqlist = seqlist  # SeqList Object to store individually Xd sequences
            query = haqlist.obj['QuerySeq']
            if self.opt['NoQuery']:
                query = None
            badres = [-1, 0]  # List of how many bad residues in total dataset
            block_align = {
            }  # Dictionary of whether residue in block of sequence that is well-aligned or not
            res_align = {
            }  # Dictionary of whether residue of sequence is well-aligned or not
            res_gap = {
            }  # Dictionary of whether residue of sequence is a gap or not
            gap_align = {
            }  # Dictionary of whether residue of sequence is a gap in a well-aligned block or not
            for seq in haqlist.seq:
                seq.info['SAQ'] = seq.info['Sequence'][
                    0:]  # Note! Sequence is modified and SAQ not, then they are swapped at end!
                block_align[seq] = [False] * seq.seqLen()
                res_align[seq] = [False] * seq.seqLen()
                res_gap[seq] = [False] * seq.seqLen()
                gap_align[seq] = [False] * seq.seqLen()

        ### <SAQ2> ### Repeated cycles of defining well- and badly-aligned blocks
        #X#self.deBug(self.stat)
            _stage = '<2> BlockID'
            while badres[-1] != badres[-2]:  # Change in number of bad residues
                total_res = 0
                badres.append(
                    0)  # badres[-1] is the current number of bad residues
                infotxt = 'SAQ%d-%d: Calculating "bad" residues ...' % (
                    self.stat['SAQCyc'], len(badres) - 2)
                for seq in haqlist.seq:
                    myinfo = '%s %.1f%%' % (infotxt,
                                            (100.0 * haqlist.seq.index(seq) /
                                             haqlist.seqNum()))
                    self.log.printLog('\r#SAQ',
                                      myinfo,
                                      log=False,
                                      newline=False)
                    #self.verbose(0,3,'\r%45s' % myinfo,0)

                    ## <SAQ2a> ## For each sequence, mark residues as aligned or gapped
                    _stage = '<2a> Mark Residues'
                    for r in range(seq.seqLen()):
                        gap_align[seq][r] = False
                        res_align[seq][r] = False
                        if block_align[seq][r] or len(
                                badres
                        ) == 3:  # After first cycle, look only at well-aligned blocks (well-aligned for sequence not whole alignment)
                            a = seq.info['Sequence'][r]
                            res_gap[seq][r] = False
                            if a == '-':
                                res_gap[seq][r] = True
                                gap_align[seq][r] = True
                            else:  # 'X' handled by self._saqCon
                                conx = 0  # Matches with good regions of otherseqs (*including self*)
                                for otherseq in haqlist.seq[0:]:
                                    if otherseq == seq:  # > so self not counted!
                                        continue
                                    if len(otherseq.info['Sequence']) != len(
                                            seq.info['Sequence']):
                                        self.log.errorLog(
                                            'Sequence lengths do not match - should be aligned!',
                                            printerror=False)
                                        raise ValueError
                                    if (block_align[otherseq][r]
                                            or len(badres) == 3):
                                        conx += self._saqCon(
                                            a, otherseq.info['Sequence'][r])
                                #if seq == query and r > 590:
                                #    print seq.shortName(),r,conx,'vs',self.stat['SAQCon'],
                                if conx >= self.stat['SAQCon']:
                                    res_align[seq][r] = True
                        #if seq == query and r > 590:
                        #    print r, res_align[seq][r]

                    ## <SAQ2b> ## Marked regions of well-aligned residues for each sequence
                    _stage = '<2b> Mark Regions'
                    ## <i> ## Clear first
                    _stage = '<2b-i> Mark Regions'
                    for r in range(seq.seqLen()):
                        block_align[seq][r] = False
                    ## <ii> ## Recalculate
                    _stage = '<2b-ii> Mark Regions'
                    for r in range(seq.seqLen()):
                        _stage = '<2b-ii> Blocks'
                        if res_align[seq][r]:  # Start of potential block
                            blen = 0  # Block length (SAQBlock) = AAs
                            win = 0  # Window length = all sequence
                            matchx = 1  # Good residues in window (first residue must be good!) (SAQMatch)
                            while blen < self.stat[
                                    'SAQBlock'] and matchx < self.stat[
                                        'SAQMatch']:
                                win += 1
                                if (r + win
                                    ) >= seq.seqLen() or seq.info['Sequence'][
                                        r +
                                        win] == 'X':  # Hit Bad Region: Abort
                                    break
                                else:  # Better region
                                    if gap_align[seq][r + win]:  # Decent gap
                                        continue
                                    else:
                                        blen += 1  # Increase Block
                                        if res_align[seq][r +
                                                          win]:  # Good residue
                                            matchx += 1
                            #if seq == query and r > 590:
                            #    print seq.shortName(),r,matchx,'vs',self.stat['SAQMatch'],
                            if matchx >= self.stat['SAQMatch']:
                                for w in range((win + 1)):
                                    block_align[seq][r + w] = True
                        #if seq == query and r > 590:
                        #    print r, block_align[seq][r]
                    ## <iii> ## Update bad residue count
                    for r in range(seq.seqLen()):
                        _stage = '<2b-iii> Mark Regions'
                        #print seq.shortName(), r, seq.seqLen(), block_align[seq][r], res_gap[seq][r], badres[-1]   # Bad residue
                        if not block_align[seq][r] and not res_gap[seq][
                                r]:  # Bad residue
                            badres[-1] += 1
                        if not res_gap[seq][r]:
                            total_res += 1
                myinfo = '%s 100.0%%' % infotxt
                myinfo += ' => %s bad of %s total residues' % (
                    rje.integerString(
                        badres[-1]), rje.integerString(total_res))
                self.log.printLog('\r#SAQ', myinfo)
                #self.verbose(0,3,'\r%45s' % myinfo,0)
                if badres[-1] == total_res:
                    self.log.errorLog('All residues marked as bad in SAQ!',
                                      printerror=False,
                                      quitchoice=True)
                # Now have all residues in all sequences marked as good (block_align=True) or bad (block_align=False)

        ### <SAQ3> ### X out badly-aligned blocks
            _stage = '<3> X-Out'
            self.log.printLog('#SAQ',
                              'SAQ%d-%d: Masking "bad" residues ...' %
                              (self.stat['SAQCyc'], len(badres) - 2),
                              log=False,
                              newline=False)
            #self.verbose(0,3,'SAQ%d-%d: Masking "bad" residues ...' % (self.stat['SAQCyc'],len(badres)-2),0)
            for seq in haqlist.seq:
                newseq = ''
                for r in range(seq.seqLen()):
                    if block_align[seq][r] or seq.info['Sequence'][
                            r] == '-':  #!# Was backwards? res_gap[seq][r] == False:
                        newseq += seq.info['Sequence'][r]
                    else:  # Bad residue
                        newseq += 'X'
                seq.info['Sequence'] = newseq[0:]
                seq.info['SAQX'] = newseq[
                    0:]  # Stores Xd sequences for individuals for use in PAQ
            #!# Add saving of data in 'datafull' option

        ### <SAQ4> ### Remove sequences and/or badly-aligned regions
            _stage = '<4> Removal'
            self.log.printLog(
                '\r#SAQ',
                'SAQ%d-%d: Removing bad sequences and/or dodgy regions...' %
                (self.stat['SAQCyc'], len(badres) - 2),
                log=False,
                newline=False)
            #self.verbose(0,3,'\rSAQ%d-%d: Removing bad sequences and/or dodgy regions...' % (self.stat['SAQCyc'],len(badres)-2),0)
            ## <SAQ4a> ## Process Query first - only interested in good regions within query
            _stage = '<4a> Query Removal'
            if self.opt[
                    'NoQuery'] or query == None:  # No preprocessing of Query
                self.verbose(0, 4, 'no Master Query processing...', 0)
            else:
                haqlist.mapX(
                    query, qtrim=True, focus=focus
                )  # Replaces other sequence ends and query X columns with Xs
                self.verbose(0, 4,
                             'Query (%s) processed...' % query.shortName(), 0)
            self.verbose(0, 3, '', 1)
            if self.opt['ManSAQ']:
                haqlist.saveFasta(seqfile='%s.mansaq.fas' %
                                  haqlist.info['Basefile'])

            ## <SAQ4b> ## Cycle through other sequences (worst first) until no more good residues or sequences are lost
            _stage = '<4b> Seq Removal'
            goodres = [0, self._getGood(haqlist.seq)
                       ]  # List of number of 'good' residues
            goodseq = [0, haqlist.seqNum()]
            while goodres[-1] != goodres[-2] or goodseq[-1] != goodseq[-2]:
                colgood = [
                    0
                ] * haqlist.seq[0].seqLen()  # Good residues per column
                for r in range(haqlist.seq[0].seqLen()):
                    for seq in haqlist.seq:
                        if seq.info['Sequence'][r] != '-' and seq.info[
                                'Sequence'][r] != 'X':
                            colgood[r] += 1
                ## <i> ## Compare relative loss of masking and losing each sequence
                keepx = {
                }  # Dictionary of seq:number of lost residues if seq kept
                losex = {
                }  # Dictionary of seq:number of lost residues if seq lost
                badkx = -1  # Biggest loss if kept
                badlx = -1  # Biggest loss if lost
                bads = None  # Worst sequence
                for seq in haqlist.seq:
                    if seq == query and self.opt['NoQuery'] == False:
                        continue  # Next sequence
                    # Calculate keepx and losex
                    keepx[seq] = 0
                    for r in range(seq.seqLen()):
                        if seq.info['Sequence'][r] == 'X':
                            keepx[seq] += colgood[r]
                    losex[seq] = self._getGood([seq])
                    # Update bads if worse
                    if keepx[seq] > badkx:
                        badkx = keepx[seq]
                        badlx = losex[seq]
                        bads = seq
                    elif keepx[seq] == badkx and losex[seq] < badlx:
                        badlx = losex[seq]
                        bads = seq
                ## <ii> ## Remove bad sequences and/or regions
                if badkx > 0:
                    if self.opt['ManSAQ']:
                        default = 'N'
                        if badkx * self.stat['SAQKeepLen'] > badlx * self.stat[
                                'SAQKeepSeq']:  # Lose sequence!
                            default = 'Y'
                        if rje.yesNo(
                                '%s worst: -%s aa if kept vs -%s aa if lost. Remove?'
                                % (bads.shortName(), rje.integerString(badkx),
                                   rje.integerString(badlx)), default):
                            seqlist.removeSeq(
                                text=
                                'SAQ%d: -%s aa if kept vs -%s aa if lost. (Manual decision.)'
                                %
                                (self.stat['SAQCyc'], rje.integerString(badkx),
                                 rje.integerString(badlx)),
                                seq=bads)
                        else:  # X out
                            haqlist.mapX(bads)
                    else:
                        self.verbose(
                            1, 3,
                            '%s worst: -%s aa if kept vs -%s aa if lost.' %
                            (bads.shortName(), rje.integerString(badkx),
                             rje.integerString(badlx)), 1)
                        #!# Add option for upweighting certain sequence type? (e.g. vs fragment or hypothetical?)
                        if badkx * self.stat['SAQKeepLen'] > badlx * self.stat[
                                'SAQKeepSeq']:  # Lose sequence!
                            haqlist.removeSeq(
                                text='SAQ%d: -%s aa if kept vs -%s aa if lost.'
                                %
                                (self.stat['SAQCyc'], rje.integerString(badkx),
                                 rje.integerString(badlx)),
                                seq=bads)
                        else:  # X out
                            haqlist.mapX(bads)
                ### <iii> ### Recalculate goodres
                goodres.append(self._getGood(haqlist.seq))
                goodseq.append(haqlist.seqNum())
                #X#self.verbose(1,3,'%d -> %d "good" aa' % (goodres[-2],goodres[-1]),1)

            ### <SAQ5> ### Reinstate UnX'd sequence:
            _stage = '<4b> Seq Removal'
            for seq in haqlist.seq:
                #print seq.info
                [seq.info['SAQ'], seq.info['Sequence']
                 ] = [seq.info['Sequence'], seq.info['SAQ']]
            if self.opt['ManSAQ'] and rje.checkForFile(
                    '%s.mansaq.fas' % haqlist.info['Basefile']):
                os.unlink('%s.mansaq.fas' % haqlist.info['Basefile'])

        except:
            self.log.errorLog('Problem with singleSeqAQ() %s.' % _stage,
                              quitchoice=True)
Esempio n. 32
0
    def singleSeqAQ(self,seqlist,focus=[0,-1]):     ### Performs SAQ on seqlist, adding seq.info['SAQ']
        '''
        Performs SAQ on seqlist, adding seq.info['SAQ'].
        >> seqlist:rje_seq.SeqList Object
        - NB. This object will itself have sequences removed from it, so beware!
        - A new info key will be added: SAQX = SAQ sequences with individual Xs
        - A new info key will be added: SAQ = SAQ sequences with aligment Xs
        >> focus:list of range positions [X:Y] to look at. If Y=0 then [X:].
        '''
        ### <SAQ1> ### Setup
        try:
            _stage = '<1> Setup'
            haqlist = seqlist   # SeqList Object to store individually Xd sequences
            query = haqlist.obj['QuerySeq']
            if self.opt['NoQuery']:
                query = None
            badres = [-1,0]     # List of how many bad residues in total dataset
            block_align = {}    # Dictionary of whether residue in block of sequence that is well-aligned or not
            res_align = {}      # Dictionary of whether residue of sequence is well-aligned or not
            res_gap = {}        # Dictionary of whether residue of sequence is a gap or not
            gap_align = {}      # Dictionary of whether residue of sequence is a gap in a well-aligned block or not
            for seq in haqlist.seq:
                seq.info['SAQ'] = seq.info['Sequence'][0:]      # Note! Sequence is modified and SAQ not, then they are swapped at end!
                block_align[seq] = [False] * seq.seqLen()
                res_align[seq] = [False] * seq.seqLen()
                res_gap[seq] = [False] * seq.seqLen()
                gap_align[seq] = [False] * seq.seqLen()

        ### <SAQ2> ### Repeated cycles of defining well- and badly-aligned blocks
            #X#self.deBug(self.stat)
            _stage = '<2> BlockID'
            while badres[-1] != badres[-2]:     # Change in number of bad residues
                total_res = 0
                badres.append(0)    # badres[-1] is the current number of bad residues
                infotxt = 'SAQ%d-%d: Calculating "bad" residues ...' % (self.stat['SAQCyc'],len(badres)-2)
                for seq in haqlist.seq:
                    myinfo = '%s %.1f%%' % (infotxt,(100.0 * haqlist.seq.index(seq) / haqlist.seqNum()))
                    self.log.printLog('\r#SAQ',myinfo,log=False,newline=False)
                    #self.verbose(0,3,'\r%45s' % myinfo,0)

                    ## <SAQ2a> ## For each sequence, mark residues as aligned or gapped
                    _stage = '<2a> Mark Residues'
                    for r in range(seq.seqLen()):
                        gap_align[seq][r] = False
                        res_align[seq][r] = False
                        if block_align[seq][r] or len(badres) == 3:     # After first cycle, look only at well-aligned blocks (well-aligned for sequence not whole alignment)
                            a = seq.info['Sequence'][r]
                            res_gap[seq][r] = False
                            if a == '-':
                                res_gap[seq][r] = True
                                gap_align[seq][r] = True
                            else:   # 'X' handled by self._saqCon
                                conx = 0  # Matches with good regions of otherseqs (*including self*)
                                for otherseq in haqlist.seq[0:]:
                                    if otherseq == seq:     # > so self not counted!
                                        continue
                                    if len(otherseq.info['Sequence']) != len(seq.info['Sequence']):
                                        self.log.errorLog('Sequence lengths do not match - should be aligned!',printerror=False)
                                        raise ValueError
                                    if (block_align[otherseq][r] or len(badres) == 3):
                                        conx += self._saqCon(a, otherseq.info['Sequence'][r])
                                #if seq == query and r > 590:
                                #    print seq.shortName(),r,conx,'vs',self.stat['SAQCon'],
                                if conx >= self.stat['SAQCon']:    
                                    res_align[seq][r] = True
                        #if seq == query and r > 590:
                        #    print r, res_align[seq][r]

                    ## <SAQ2b> ## Marked regions of well-aligned residues for each sequence
                    _stage = '<2b> Mark Regions'
                    ## <i> ## Clear first
                    _stage = '<2b-i> Mark Regions'
                    for r in range(seq.seqLen()):
                        block_align[seq][r] = False
                    ## <ii> ## Recalculate
                    _stage = '<2b-ii> Mark Regions'
                    for r in range(seq.seqLen()):
                        _stage = '<2b-ii> Blocks'
                        if res_align[seq][r]:   # Start of potential block
                            blen = 0    # Block length (SAQBlock) = AAs
                            win = 0     # Window length = all sequence
                            matchx = 1  # Good residues in window (first residue must be good!) (SAQMatch)
                            while blen < self.stat['SAQBlock'] and matchx < self.stat['SAQMatch']:
                                win += 1
                                if (r + win) >= seq.seqLen() or seq.info['Sequence'][r+win] == 'X':     # Hit Bad Region: Abort
                                    break
                                else:   # Better region
                                    if gap_align[seq][r+win]:   # Decent gap
                                        continue
                                    else:
                                        blen += 1   # Increase Block
                                        if res_align[seq][r+win]:   # Good residue
                                            matchx += 1
                            #if seq == query and r > 590:
                            #    print seq.shortName(),r,matchx,'vs',self.stat['SAQMatch'],
                            if matchx >= self.stat['SAQMatch']:
                                for w in range((win+1)):
                                    block_align[seq][r+w] = True
                        #if seq == query and r > 590:
                        #    print r, block_align[seq][r]
                    ## <iii> ## Update bad residue count
                    for r in range(seq.seqLen()):
                        _stage = '<2b-iii> Mark Regions'
                        #print seq.shortName(), r, seq.seqLen(), block_align[seq][r], res_gap[seq][r], badres[-1]   # Bad residue
                        if not block_align[seq][r] and not res_gap[seq][r]:   # Bad residue
                            badres[-1] += 1
                        if not res_gap[seq][r]:
                            total_res += 1
                myinfo = '%s 100.0%%' % infotxt
                myinfo += ' => %s bad of %s total residues' % (rje.integerString(badres[-1]),rje.integerString(total_res))
                self.log.printLog('\r#SAQ',myinfo)
                #self.verbose(0,3,'\r%45s' % myinfo,0)
                if badres[-1] == total_res:
                    self.log.errorLog('All residues marked as bad in SAQ!',printerror=False,quitchoice=True)
                # Now have all residues in all sequences marked as good (block_align=True) or bad (block_align=False)

        ### <SAQ3> ### X out badly-aligned blocks
            _stage = '<3> X-Out'
            self.log.printLog('#SAQ','SAQ%d-%d: Masking "bad" residues ...' % (self.stat['SAQCyc'],len(badres)-2),log=False,newline=False)
            #self.verbose(0,3,'SAQ%d-%d: Masking "bad" residues ...' % (self.stat['SAQCyc'],len(badres)-2),0)
            for seq in haqlist.seq:
                newseq = ''
                for r in range(seq.seqLen()):
                    if block_align[seq][r] or seq.info['Sequence'][r] == '-':   #!# Was backwards? res_gap[seq][r] == False:
                        newseq += seq.info['Sequence'][r]
                    else: # Bad residue
                        newseq += 'X'
                seq.info['Sequence'] = newseq[0:]
                seq.info['SAQX'] = newseq[0:]       # Stores Xd sequences for individuals for use in PAQ
            #!# Add saving of data in 'datafull' option

        ### <SAQ4> ### Remove sequences and/or badly-aligned regions
            _stage = '<4> Removal'
            self.log.printLog('\r#SAQ','SAQ%d-%d: Removing bad sequences and/or dodgy regions...' % (self.stat['SAQCyc'],len(badres)-2),log=False,newline=False)
            #self.verbose(0,3,'\rSAQ%d-%d: Removing bad sequences and/or dodgy regions...' % (self.stat['SAQCyc'],len(badres)-2),0)
            ## <SAQ4a> ## Process Query first - only interested in good regions within query
            _stage = '<4a> Query Removal'
            if self.opt['NoQuery'] or query == None:  # No preprocessing of Query
                self.verbose(0,4,'no Master Query processing...',0)
            else:
                haqlist.mapX(query, qtrim=True, focus=focus) # Replaces other sequence ends and query X columns with Xs
                self.verbose(0,4,'Query (%s) processed...' % query.shortName(),0)
            self.verbose(0,3,'',1)
            if self.opt['ManSAQ']:
                haqlist.saveFasta(seqfile='%s.mansaq.fas' % haqlist.info['Basefile'])

            ## <SAQ4b> ## Cycle through other sequences (worst first) until no more good residues or sequences are lost
            _stage = '<4b> Seq Removal'
            goodres = [0, self._getGood(haqlist.seq)]   # List of number of 'good' residues
            goodseq = [0, haqlist.seqNum()]
            while goodres[-1] != goodres[-2] or goodseq[-1] != goodseq[-2]:
                colgood = [0] * haqlist.seq[0].seqLen()    # Good residues per column
                for r in range(haqlist.seq[0].seqLen()):
                    for seq in haqlist.seq:
                        if seq.info['Sequence'][r] != '-' and seq.info['Sequence'][r] != 'X':
                            colgood[r] += 1
                ## <i> ## Compare relative loss of masking and losing each sequence
                keepx = {}  # Dictionary of seq:number of lost residues if seq kept
                losex = {}  # Dictionary of seq:number of lost residues if seq lost
                badkx = -1  # Biggest loss if kept
                badlx = -1  # Biggest loss if lost
                bads = None # Worst sequence
                for seq in haqlist.seq:
                    if seq == query and self.opt['NoQuery'] == False:
                        continue    # Next sequence
                    # Calculate keepx and losex
                    keepx[seq] = 0
                    for r in range(seq.seqLen()):
                        if seq.info['Sequence'][r] == 'X':
                            keepx[seq] += colgood[r]
                    losex[seq] = self._getGood([seq])
                    # Update bads if worse
                    if keepx[seq] > badkx:
                        badkx = keepx[seq]
                        badlx = losex[seq]
                        bads = seq
                    elif keepx[seq] == badkx and losex[seq] < badlx:
                        badlx = losex[seq]
                        bads = seq
                ## <ii> ## Remove bad sequences and/or regions
                if badkx > 0:
                    if self.opt['ManSAQ']:
                        default = 'N'
                        if badkx * self.stat['SAQKeepLen'] > badlx * self.stat['SAQKeepSeq']:   # Lose sequence!
                            default = 'Y'
                        if rje.yesNo('%s worst: -%s aa if kept vs -%s aa if lost. Remove?' % (bads.shortName(),rje.integerString(badkx),rje.integerString(badlx)),default):
                            seqlist.removeSeq(text='SAQ%d: -%s aa if kept vs -%s aa if lost. (Manual decision.)' % (self.stat['SAQCyc'],rje.integerString(badkx),rje.integerString(badlx)),seq=bads)
                        else:   # X out
                            haqlist.mapX(bads)
                    else:
                        self.verbose(1,3,'%s worst: -%s aa if kept vs -%s aa if lost.' % (bads.shortName(),rje.integerString(badkx),rje.integerString(badlx)),1)
                        #!# Add option for upweighting certain sequence type? (e.g. vs fragment or hypothetical?)
                        if badkx * self.stat['SAQKeepLen'] > badlx * self.stat['SAQKeepSeq']:   # Lose sequence!
                            haqlist.removeSeq(text='SAQ%d: -%s aa if kept vs -%s aa if lost.' % (self.stat['SAQCyc'],rje.integerString(badkx),rje.integerString(badlx)),seq=bads)
                        else:   # X out
                            haqlist.mapX(bads)
                ### <iii> ### Recalculate goodres
                goodres.append(self._getGood(haqlist.seq))
                goodseq.append(haqlist.seqNum())
                #X#self.verbose(1,3,'%d -> %d "good" aa' % (goodres[-2],goodres[-1]),1)

            ### <SAQ5> ### Reinstate UnX'd sequence:
            _stage = '<4b> Seq Removal'
            for seq in haqlist.seq:
                #print seq.info
                [seq.info['SAQ'],seq.info['Sequence']] = [seq.info['Sequence'],seq.info['SAQ']]
            if self.opt['ManSAQ'] and rje.checkForFile('%s.mansaq.fas' % haqlist.info['Basefile']):
                os.unlink('%s.mansaq.fas' % haqlist.info['Basefile'])

        except:
            self.log.errorLog('Problem with singleSeqAQ() %s.' % _stage, quitchoice=True)
Esempio n. 33
0
 def difference(self,table1,table2): ### Generates differences as new table
     '''
     Generates differences as new table.
     >> table1:Table = iTunes database table to compare
     >> table2:Table = iTunes database table to compare
     '''
     try:### ~ [1] Setup ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         dfields = ['Name','Artist','Composer','Album','Album_Artist','Genre','Time','Disc Number','Disc Count','Track Number','Track Count','Year','Date Added','Plays','Last Played','Skips','Last Skipped','My Rating','Location','Tracks','Score']
         db = self.db()
         tabindex = '#Artist#|#Album#|#Track Number#|#Name#'
         try:
             age1 = string.atoi(string.split(table1.name(),'.')[-1])
             age2 = string.atoi(string.split(table2.name(),'.')[-1])
             table1.index(tabindex,make=True)
             table2.index(tabindex,make=True)
             if age1 < age2: oldtable = table1; newtable = table2; newdate = age2
             else: newtable = table1; oldtable = table2; newdate = age1              
             diftable = db.copyTable(newtable,'%s-%s' % (oldtable.name(),string.split(newtable.name(),'.')[-1]))
             diftable.keepFields(dfields+[tabindex])
             diftable.addField('Status')
         except: self.errorLog('Cannot generate differences for %s and %s' % (table1,table2))
         ### ~ [2] Process ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         self.printLog('#NEW','%s tracks in new iTunes export.' % rje.iStr(newtable.entryNum()))
         self.printLog('#OLD','%s tracks in old iTunes export.' % rje.iStr(oldtable.entryNum()))
         oldfiles = oldtable.datakeys()[0:]
         for entry in diftable.entries():
             ## ~ [2a] Find pair of entries ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
             if entry['Location'] in oldfiles: oldentry = oldtable.data(entry['Location'])
             elif entry[tabindex] in oldtable.index(tabindex): 
                 oldentry = oldtable.indexEntries(tabindex,entry[tabindex])[0]
                 if len(oldtable.indexEntries(tabindex,entry[tabindex])) == 1: pass
                 else:
                     self.printLog('#DUP','Duplicate entries for %s' % entry[tabindex])
                     for ientry in oldtable.indexEntries(tabindex,entry[tabindex]):
                         if ientry['Location'] in oldfiles: oldentry = ientry; break
             else: oldentry = None
             #self.deBug(entry)
             #self.deBug(oldentry)
             ## ~ [2b] Generate Differences ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
             if not oldentry:
                 entry['Status'] = 'New'
                 continue
             #self.deBug(oldentry['Location'] in oldfiles)
             if oldentry['Location'] in oldfiles: oldfiles.remove(oldentry['Location'])
             #self.deBug(len(oldfiles))
             changed = False
             for field in ['Plays','Skips','My Rating']:
                 if entry[field] != oldentry[field]: changed = True
                 try: entry[field] -= oldentry[field]
                 except: pass    # Keep new value - probably empty in old entry
             if changed: entry['Status'] = 'Changed'
             else: entry['Status'] = 'Unchanged'            
         ### ~ [3] Add missing old entries ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         reportdel = rje.yesNo('Report deleted %s tracks?' % diftable.name())
         for old in oldfiles:
             entry = diftable.addEntry(oldtable.data(old))
             entry['Status'] = 'Deleted'
             if reportdel: self.printLog('#DEL','%s: %s [%s]' % (entry['Artist'],entry['Name'],entry['Album']))
         ### ~ [4] Finish ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         for status in rje.sortKeys(diftable.index('Status')):
             self.printLog('#STAT','%s: %d tracks' % (status.upper(),len(diftable.index('Status')[status])))
         self.printLog('#TRACK','%s tracks in total' % rje.iStr(diftable.entryNum()))
         self.deBug('?')
         for table in [table1,table2,diftable]: table.dropField(tabindex)
         diftable.saveToFile('%s.tdt' % diftable.name())
     except: self.errorLog('%s.difference() error' % self)
Esempio n. 34
0
def badasp(out,mainlog,cmd_list,tree=None): ### Main BADASP Method
    '''
    Main BADASP Method. Automated run if interactive < 1
    <1> Load Sequences and Tree
    <2> Define Subfamilies
    <3> GASP Ancestral Sequence Prediction
    <4> Peform Functional Specificity and Sequence Conservation Calculations
    <5> Output Results
    '''
    try:    ### <0> ### Setup
        _seqfile = None
        _treefile = None
        append_file = None
        basefile = None
        for cmd in cmd_list:
            if cmd.find('seqin=') == 0:
                _seqfile = cmd[len('seqin='):]
                if _seqfile[-4] == '.':
                    _seqfile = _seqfile[:-4]
            if cmd.find('useanc=') == 0:
                _seqfile = cmd[len('useanc='):]
                if _seqfile[-8:] == '.anc.fas':
                    _seqfile = _seqfile[:-8]
            if cmd.find('nsfin=') == 0:
                _treefile = cmd[len('nsfin='):]
            if cmd.find('append=') == 0:
                append_file = cmd[len('append='):]
            if cmd.find('basefile=') == 0:
                basefile = cmd[len('basefile='):]
        if _seqfile and os.path.exists('%s.grp' % _seqfile):
            cmd_list.append('group=%s.grp' % _seqfile)
        if _seqfile and _treefile == None:
            if rje.checkForFile('%s.nwk' % _seqfile): _treefile = '%s.nwk' % _seqfile
            else: _treefile = '%s.nsf' % _seqfile
            out.verbose(0,2,'Looking for treefile %s.' % _treefile,1)
            if rje.checkForFile(_treefile):
                cmd_list.append('nsfin=%s' % _treefile)
                
        if tree == None:
            mainlog.verbose(0,1,'Tree: %s' % cmd_list,2)
            tree = rje_tree.Tree(log=mainlog,cmd_list=cmd_list)
            #tree._setupFromCmd()
        if tree.stat['MinFamNum'] < 2:
            tree.stat['MinFamNum'] = 2

            ### <1> ### Load Sequences and Tree
        while out.stat['Interactive'] > 0 or tree.obj['SeqList'] == None:
            tree = rje_tree.treeMenu(out,mainlog,['root=yes']+cmd_list,tree)
            if tree.obj['SeqList'] and tree.opt['Rooted']:
                break
            else:
                print '\n ** Must have loaded sequences and a rooted tree. ** \n'
                if out.stat['Interactive'] < 0 or rje.yesNo('Quit BADASP?',default='N'):
                    sys.exit()

        basename = tree.obj['SeqList'].info['Name']
        if basename[-4:] == '.fas':
            basename = basename[:-4]
        if basename[-4:] == '.anc':
            basename = basename[:-4]
        if basefile:
            basename = basefile
                                
    except SystemExit:
        raise
    except:
        mainlog.errorLog('Major Error in badasp loading sequences and tree',True)

    try:    ### <2> ### Define Subfamilies
        while out.stat['Interactive'] > 0 or tree.groupNum() < 2:
            tree.treeGroup(callmenu=True)
            if tree.groupNum() >= 2:
                break
            else:
                mainlog.errorLog('Must have at least two subfamilies for specificity analyses.',printerror=False)
                if out.stat['Interactive'] < 0 or rje.yesNo('Continue without specificity analyses?'):
                    cmd_list.append('funcspec=')
                    break
                elif rje.yesNo('Abort BADASP?'):
                    sys.exit()
    except SystemExit:
        raise
    except:
        mainlog.errorLog('Major Error in BADASP subfamilies',True)

    try:    ### <3> ### GASP Ancestral Sequence Prediction
        if tree.node[-1].obj['Sequence'] == None:   # No ancseq loaded
            while out.stat['Interactive'] > 0 and rje.yesNo('Use %s for output filenames?' % basename) == False:
                basename = rje.choice('FILEname (FILE.anc.fas, FILE.anc.nsf, FILE.txt)?: ', default=basename)
            mygasp = rje_ancseq.Gasp(tree=tree,ancfile=basename,cmd_list=cmd_list,log=mainlog)
            out.verbose(0,2,'%s' % mygasp.details(),1)
            if out.stat['Interactive'] > 0:
                if rje.yesNo('Use these parameters?') == False:
                    mygasp.edit()
            mygasp.gasp()
    except:
        mainlog.errorLog('Major Error in BADASP GASP',True)
        
    try:    ### <4> ### Peform Functional Specificity and Sequence Conservation Calculations
        _stage = '<4> Specificity/Conservation Analyses'
        aaprop = rje_aaprop.AAPropMatrix(log=mainlog,cmd_list=cmd_list)
        query = tree.obj['SeqList'].obj['QuerySeq']
        ## <a> ## Chosen Methods
        _stage = '<4a> Specificity/Conservation Analyses - Chosen Methods'
        funcspec = rje_specificity.methodlist   # ['BAD','BADN','BADX']
        seqcon = rje_conseq.methodlist # ['info']
        for cmd in cmd_list:
            if cmd.find('funcspec=') == 0:
                funcspec = cmd[9:].split(',')
            if cmd.find('seqcon=') == 0:
                seqcon = cmd[len('seqcon='):].split(',')
        if 'all' in funcspec:
            funcspec = rje_specificity.methodlist
        if 'all' in seqcon:
            seqcon = rje_conseq.methodlist
        for method in ['BADX','BADN','QPCon_Mean','QPCon_Abs','QPCon_Mean_All']:
            while method in funcspec and query == None:
                if rje.yesNo('Method %s needs query but none given. Drop %s from specificity methods?' % (method,method)):
                    funcspec.remove(method)
                    break
                for seq in tree.obj['SeqList'].seq:
                    if rje.yesNo('Method %s needs query but none given. Use sequence 1 (%s)?' % (method,seq.shortName()),default='N'):
                        query = seq
                        tree.obj['SeqList'].obj['Query'] = seq
                        break
            while method in seqcon and query == None:
                if rje.yesNo('Method %s needs query but none given. Drop %s from conservation methods?' % (method,method)):
                    seqcon.remove(method)
                    break
                for seq in tree.obj['SeqList'].seq:
                    if rje.yesNo('Method %s needs query but none given. Use sequence 1 (%s)?' % (method,seq.shortName()),default='N'):
                        query = seq
                        tree.obj['SeqList'].obj['Query'] = seq
                        break
                
        qname = query
        if query:
            qname = query.info['Name']
        out.verbose(0,3,'\nQuery = %s' % qname,2)

        ## <b> ## Spec Calculations
        _stage = '<4b> Specificity Calculations'
        specmatrix = rje_specificity.FuncSpec(log=mainlog,cmd_list=cmd_list,tree=tree,aaprop=aaprop)
        specmatrix.calcScore(query=query,methods=funcspec)

        ## <c> ## Conservation Calculations
        _stage = '<4c> Specificity/Conservation Analyses - Conservation Calculations'
        conseq = rje_conseq.SeqStat(log=mainlog,cmd_list=cmd_list,tree=tree,aaprop=aaprop)
        conseq.calcScore(query=query,methods=seqcon)   ### Sends appropriate seqlist to self.calcScore()

        ## <d> ## Special Case: QPCon vs All seqs
        _stage = '<4d> Specificity/Conservation Analyses - QPCon vs All'
        qpconall = []
        #if 'QPCon_Abs_All' in seqcon and query:
        #    qpconall.append('QPCon_Abs')
        if 'QPCon_Mean_All' in seqcon and query:
            qpconall.append('QPCon_Mean')
        for qp in qpconall:
            conseq.score['%s_All' % qp] = conseq.score[qp] 
            if conseq.alnwin.has_key(qp):
                conseq.alnwin['%s_All' % qp] = conseq.alnwin[qp] 
            if conseq.qrywin.has_key(qp):
                conseq.qrywin['%s_All' % qp] = conseq.qrywin[qp] 
            if conseq.rank.has_key(qp):
                conseq.rank['%s_All' % qp] = conseq.rank[qp] 
            if conseq.alnrankwin.has_key(qp):
                conseq.alnrankwin['%s_All' % qp] = conseq.alnrankwin[qp] 
            if conseq.qryrankwin.has_key(qp):
                conseq.qryrankwin['%s_All' % qp] = conseq.qryrankwin[qp] 
        
        _stage = '<4d> Specificity/Conservation Analyses - FamQP'
        famqp = []
        if 'QPCon_Mean' in seqcon:
            famqp.append('QPCon_Mean')
        if 'QPCon_Abs' in seqcon:
            famqp.append('QPCon_Abs')
        if len(famqp) > 0 and query:    #!# And subfam option?
            qseq = []
            for fam in tree.subfam:
                for node in tree._nodeClade(fam):
                    if query == node.obj['Sequence']:
                        for qnode in tree._nodeClade(fam):
                            qseq.append(qnode.obj['Sequence'])
            conseq.calcScore(query=query,seqlist=qseq,methods=famqp)   ### Sends appropriate seqlist to self.calcScore()

    except:
        mainlog.errorLog('Major Error in BADASP Specificity Analysis (%s):' % _stage,True)
        
    try:    ### <5> ### Full Output Results
        _stage = '<5> Full Output'
        # This output is in a tab- or comma-delimited file for easy manipulation or viewing with other programs.
        # (1) statistics for a given residue;
        # (2) statistics for a given window size across
        # - (a) the whole alignment,    (node=None)
        # - (b) the Query protein of interest (if given) and    (node=QueryNode)
        # - (c) the ancestral sequence of each subfamily;   (node=ancnode)
        # (3) Predicted ancestral sequences at
        # - (a) the root and
        # - (b) the ancestor of each subfamily.
        delimit = rje.getDelimit(cmd_list)

        ## <a> ## Setup        
        _stage = '<5a> Output - Setup'
        rankout = specmatrix.opt['Rank']
        #tree._regenerateSeqList(tree.obj['SeqList'],tree.node)
        root = tree.node[-1].obj['Sequence']      #!# At some point, make sure this is the most ancient duplication!
        out.verbose(0,3,'\nBADASP Results Output (%s.badasp) ...' % basename,0)

        ## <b> ## Header
        _stage = '<5b> Output - Header'
        _header = True
        if append_file:
            if rje.checkForFile(append_file):
               _header = False
            BADASP = open(append_file, 'a')
        else:
            BADASP = open('%s.badasp' % basename, 'w')
            BADASP.write("BADASP Output: %s\n" % (time.asctime(time.localtime(time.time()))))
            BADASP.write('%s\n\n' % cmd_list)        
        header = ['aln_pos','anc_aa'] # Aln Pos and AA
        alnlen = 0
        statlist = funcspec + seqcon
        _stage = '<5b-i> Output - Header Query'
        if query:
            header += ['qry_pos','qry_aa']  # Qry Pos and AA
        _stage = '<5b-ii> Output - Header Subfam'
        for f in range(len(tree.subfam)):
            header += ['fam%d_pos' % (f+1),'fam%d_aa' % (f+1)]   # Subfam Pos and AA
        for func in statlist:
            _stage = '<5b-iii> Output - Header %s' % func
            statobj = statObj(method=func,objlist=[specmatrix,conseq])
            fs = func.lower()
            alnlen = len(statobj.score[func])                
            header.append(fs)                   # Score
            if rankout:
                header.append('%s_rank' % fs)   # Rank
            if statobj.stat['WinSize'] > 1:
                header.append('%s_alnwin' % fs)     # Full align window
                if rankout:
                    header.append('%s_alnrankwin' % fs)   # Rank
                if query:
                    header.append('%s_qrywin' % fs) # Qry window
                    if rankout:
                        header.append('%s_qryrankwin' % fs)   # Rank
                if func in funcspec:
                    for f in range(len(tree.subfam)):
                        header.append('%s_fam%d_win' % (fs,f+1))  # Subfam windows
                        if rankout:
                            header.append('%s_fam%d_rankwin' % (fs,f+1))  # Subfam windows                    
        #if _header:
        BADASP.write('%s\n' % string.join(header, delimit))
        out.verbose(1,3,'%s...' % string.join(header, delimit),0)

        ## <c> ## Stats
        _stage = '<5c> Stats'
        qr = 0  # Qry pos
        fr = [0] * len(tree.subfam) # List of subfam positions
        aa = '' # Root aa
        qa = '' # Qry aa
        fa = [''] * len(tree.subfam) # List of subfam aas
        for r in range(alnlen):
            # <i> # Positions and aas
            _stage = '<5c-i> Output - Stats, positions & aas'
            aa = root.info['Sequence'][r]
            if query:
                qa = query.info['Sequence'][r]
                if qa != '-':
                    qr += 1
            for f in range(len(tree.subfam)):
                fa[f] = tree.subfam[f].obj['Sequence'].info['Sequence'][r]
                if fa[f] != '-':
                    fr[f] += 1

            # <ii> # Positions and AAs ii
            _stage = '<5c-ii> Output - Pos & AA ii'
            line = ['%d' % (r+1), aa]    # Aln Pos and AA
            if query:
                if qa == '-':               
                    line += ['-',qa]  # Qry Pos and AA
                else:               
                    line += ['%d' % qr,qa]  # Qry Pos and AA
            for f in range(len(tree.subfam)):
                if fa[f] == '-':
                    line += ['-',fa[f]]   # Subfam Pos and AA
                else:
                    line += ['%d' % fr[f],fa[f]]   # Subfam Pos and AA

            # <iii> # Stats                        
            _stage = '<5c-iii> Output - Stats'
            for func in statlist:
                statobj = statObj(method=func,objlist=[specmatrix,conseq])
                fs = func.lower()
                line.append(str(statobj.score[func][r]))   # Score
                if rankout:
                    line.append(str(statobj.rank[func][r]))   # Rank
                if specmatrix.stat['WinSize'] > 1:
                    line.append(str(statobj.alnwin[func][r]))     # Full align window
                    if rankout:
                        line.append(str(statobj.alnrankwin[func][r]))   # Rank
                    if query:
                        line.append(str(statobj.qrywin[func][r])) # Qry window
                        if rankout:
                            line.append(str(statobj.qryrankwin[func][r]))   # Rank
                    if func in funcspec:
                        for f in range(len(tree.subfam)):
                            line.append(str(statobj.famwin[func][tree.subfam[f]][r]))  # Subfam windows
                            if rankout:
                                line.append(str(statobj.famrankwin[func][tree.subfam[f]][r]))   # Subfam windows                    
            # <iv> # Writing
            _stage = '<5c-iv> Output - Writing'
            BADASP.write('%s\n' % string.join(line, delimit))
        BADASP.close()
        out.verbose(0,2,'Done!',2)

    except:
        mainlog.errorLog('Fatal Error in BADASP Full output (%s):' % _stage,True)
        BADASP.write('%s\n' % string.join(line, delimit))
        BADASP.close()
                        
    try:    ### <6> ### Partial Results Output 
        _stage = '<6> Partial Output'

        ## <a> ## Setup        
        _stage = '<6a> Output - Setup'
        # statlist & alnlen from above
        _part_append = False
        if out.stat['Interactive'] > 0 and rje.yesNo('Output additional, filtered results?',default='N'):
            partfile = rje.choice('Name for partial results file?:','%s.partial.badasp' % basename,confirm=True)
            if rje.checkForFile(partfile) and rje.yesNo('File %s exists. Append file without headers?' % partfile):
                _part_append = True
        else:
            return
        if rje.yesNo('Filter output columns?',default='N'):
            if rje.yesNo('Output query details (pos,aa & win)?') == False:
                query = None
            f = 1
            for fam in tree.subfam[0:]:
                if rje.yesNo('Output subfam %d (%s) details (pos,aa & win)?' % (f,fam.info['CladeName'])) == False:
                    tree.subfam.remove(fam)
                f += 1
            for func in statlist[0:]:
                if rje.yesNo('Output %s results?' % func) == False:
                    statlist.remove(func)

        alnout = [True] * alnlen
        if rje.yesNo('Filter Rows by Results VALUES?'):
            out.verbose(0,0,'Initial Defaults are minmum values. Accept intital default for no filtering of given Stat.',1)
            for stat in statlist:
                ### Filter by value? ###
                statobj = statObj(method=stat,objlist=[specmatrix,conseq])
                scores = statobj.score[stat][0:]
                scores.sort()
                cutoff = rje.getFloat('Min. value for %s?:' % stat,default='%f' % scores[0],confirm=True)
                for r in range(alnlen):
                    if statobj.score[stat][r] < cutoff:
                        alnout[r] = False
        if rankout and rje.yesNo('Filter Rows by Results RANKS?'):
            out.verbose(0,0,'Ranks range from 0 (low) to 1 (high).',1)
            for stat in statlist:
                ### Filter by Rank? ###
                statobj = statObj(method=stat,objlist=[specmatrix,conseq])
                cutoff = rje.getFloat('Min. rank for %s?:' % stat,default='0.0',confirm=True)
                for r in range(alnlen):
                    if statobj.rank[stat][r] < cutoff:
                        alnout[r] = False
                
        out.verbose(0,3,'\nBADASP Partial Results Output (%s) ...' % partfile,0)

        ## <b> ## Header
        _stage = '<6b> Partial Output - Header'
        if _part_append:
            BADASP = open(partfile, 'a')
        else:
            BADASP = open(partfile, 'w')
            BADASP.write("Partial BADASP Output: %s\n" % (time.asctime(time.localtime(time.time()))))
            BADASP.write('%s\n\n' % cmd_list)        
        header = ['aln_pos','anc_aa'] # Aln Pos and AA
        _stage = '<6b-i> Partial Output - Header Query'
        if query:
            header += ['qry_pos','qry_aa']  # Qry Pos and AA
        _stage = '<6b-ii> Partial Output - Header Subfam'
        for f in range(len(tree.subfam)):
            header += ['fam%d_pos' % (f+1),'fam%d_aa' % (f+1)]   # Subfam Pos and AA
        for func in statlist:
            _stage = '<6b-iii> Partial Output - Header %s' % func
            statobj = statObj(method=func,objlist=[specmatrix,conseq])
            fs = func.lower()
            header.append(fs)                   # Score
            if rankout:
                header.append('%s_rank' % fs)   # Rank
            if statobj.stat['WinSize'] > 1:
                header.append('%s_alnwin' % fs)     # Full align window
                if rankout:
                    header.append('%s_alnrankwin' % fs)   # Rank
                if query:
                    header.append('%s_qrywin' % fs) # Qry window
                    if rankout:
                        header.append('%s_qryrankwin' % fs)   # Rank
                if func in funcspec:
                    for f in range(len(tree.subfam)):
                        header.append('%s_fam%d_win' % (fs,f+1))  # Subfam windows
                        if rankout:
                            header.append('%s_fam%d_rankwin' % (fs,f+1))  # Subfam windows                    
        #if not _part_append:
        BADASP.write('%s\n' % string.join(header, delimit))
        out.verbose(1,3,'%s...' % string.join(header, delimit),0)

        ## <c> ## Stats
        _stage = '<6c> Stats'
        qr = 0  # Qry pos
        fr = [0] * len(tree.subfam) # List of subfam positions
        aa = '' # Root aa
        qa = '' # Qry aa
        fa = [''] * len(tree.subfam) # List of subfam aas
        for r in range(alnlen):
            if alnout[r] == False:
                continue
            # <i> # Positions and aas
            _stage = '<6c-i> Partial Output - Stats, positions & aas'
            aa = root.info['Sequence'][r]
            if query:
                qa = query.info['Sequence'][r]
                if qa != '-':
                    qr += 1
            for f in range(len(tree.subfam)):
                fa[f] = tree.subfam[f].obj['Sequence'].info['Sequence'][r]
                if fa[f] != '-':
                    fr[f] += 1

            # <ii> # Positions and AAs ii
            _stage = '<6c-ii> Partial Output - Pos & AA ii'
            line = ['%d' % (r+1), aa]    # Aln Pos and AA
            if query:
                if qa == '-':               
                    line += ['-',qa]  # Qry Pos and AA
                else:               
                    line += ['%d' % qr,qa]  # Qry Pos and AA
            for f in range(len(tree.subfam)):
                if fa[f] == '-':
                    line += ['-',fa[f]]   # Subfam Pos and AA
                else:
                    line += ['%d' % fr[f],fa[f]]   # Subfam Pos and AA

            # <iii> # Stats                        
            _stage = '<6c-iii> Partial Output - Stats'
            for func in statlist:
                statobj = statObj(method=func,objlist=[specmatrix,conseq])
                fs = func.lower()
                line.append(str(statobj.score[func][r]))   # Score
                if rankout:
                    line.append(str(statobj.rank[func][r]))   # Rank
                if specmatrix.stat['WinSize'] > 1:
                    line.append(str(statobj.alnwin[func][r]))     # Full align window
                    if rankout:
                        line.append(str(statobj.alnrankwin[func][r]))   # Rank
                    if query:
                        line.append(str(statobj.qrywin[func][r])) # Qry window
                        if rankout:
                            line.append(str(statobj.qryrankwin[func][r]))   # Rank
                    if func in funcspec:
                        for f in range(len(tree.subfam)):
                            line.append(str(statobj.famwin[func][tree.subfam[f]][r]))  # Subfam windows
                            if rankout:
                                line.append(str(statobj.famrankwin[func][tree.subfam[f]][r]))   # Subfam windows
            # <iv> # Writing
            _stage = '<6c-iv> Partial Output - Writing'
            BADASP.write('%s\n' % string.join(line, delimit))
        BADASP.close()
        out.verbose(0,2,'Done!',2)
        
    except:
        mainlog.errorLog('Fatal Error in BADASP Partial output (%s):' % _stage,True)
        BADASP.write('%s\n' % string.join(line, delimit))
        BADASP.close()
Esempio n. 35
0
    def farmHAQ(self):  ### Uses SLiMFarmer to farm out the HAQESAC runs
        '''Uses SLiMFarmer to farm out the HAQESAC runs.'''
        try:### ~ [0] Setup ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
            batfile = os.path.abspath(rje.makePath('%shaqesac.bat' % self.info['HaqDir'],wholepath=True))
            self.printLog('#FARM',batfile)
            if not rje.exists(batfile): raise IOError('Cannot find %s' % batfile)
            farmcmd = ['subjobs=%s' % batfile,'farm=batch','qsub=F','i=-1','runpath=%s' % os.path.abspath(self.info['HaqDir'])]
            if self.opt['MultiHAQ']:
                haqfarm = ['First round','Second round']
            else: haqfarm = ['Complete run']

            ### ~ [1] Peform HAQESAC runs ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
            for farmrun in haqfarm:
                self.printLog('#CHDIR','Changing directory for %s farming: %s' % (farmrun,self.info['HaqDir']))
                os.chdir(self.info['HaqDir'])
                farmer = slimfarmer.SLiMFarmer(self.log,self.cmd_list+farmcmd)
                farmer.slimFarm()
                os.chdir(self.info['RunPath'])
                self.printLog('#CHDIR','Changed directory post-farming: %s' % self.info['RunPath'])
                self.printLog('#FARM','HAQESAC %s farming complete.' % farmrun)
            return True

            #!# Add identifying and skipping of partial runs.

            for seq in self.seqs():
                ## ~ [1a] Check AutoSkip ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
                acc = seq.info['AccNum']
                if finalrun and acc in processed and (self.opt['AutoSkip'] or (self.i() >=0 and rje.yesNo('%s already covered by previous HAQESAC. Skip?' % seq.shortName()))):
                    self.printLog('#SKIP','%s already covered by previous HAQESAC: Skipped' % seq.shortName()); continue
                ## ~ [1b] Check Whether to run (re-runs and low sequence number) ~~~~~~~~~~~~~~~~~~ ##
                logfile = rje.makePath('%s%s.log' % (self.info['HaqDir'],acc),wholepath=True)
                infile = rje.makePath('%s%s.fas' % (self.info['HaqDir'],acc),wholepath=True)
                pkfile = rje.makePath('%s%s.pickle' % (self.info['HaqDir'],acc),wholepath=True)
                pkzfile = rje.makePath('%s%s.pickle.gz' % (self.info['HaqDir'],acc),wholepath=True)
                if not os.path.exists(infile): self.printLog('#SKIP','%s input file %s not found: Skipped' % (seq.shortName(),infile)); continue
                if not finalrun and not self.opt['Force'] and rje.isYounger(pkzfile,infile) == pkzfile:
                    self.printLog('#SKIP','%s run detected: Skipped' % seq.shortName()); continue
                if not finalrun and not self.opt['Force'] and rje.isYounger(pkfile,infile) == pkfile:
                    self.printLog('#SKIP','%s run detected: Skipped' % seq.shortName()); continue
                inseqx = rje_seq.SeqCount(self,infile)
                if inseqx < 2: self.printLog('#SKIP','Only one sequence found in %s: Skipped' % (infile)); continue
                ## ~ [1c] Pause if running in Chaser Mode and no Pickle ~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
                pickled = os.path.exists(pkfile) or os.path.exists('%s.gz' % pkfile); tm = 0
                while secondrun and self.opt['Chaser'] and not pickled:
                    self.progLog('#WAIT','No %s pickle. Sleeping for %d min.' % (acc,tm))
                    time.sleep(60*tm); tm += 1
                    pickled = os.path.exists(pkfile) or os.path.exists('%s.gz' % pkfile)
                    if not pickled:
                        try: rje.choice('Press <ENTER> to try again, or <CTRL+C> to Quit')
                        except:
                            self.printLog('#PICKLE','No %s pickle.' % (acc,tm))
                            self.printLog('\r#MULTI','Exiting multiHAQ "Chaser" run.'); return
                ## ~ [1d] Run HAQESAC ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
                runhaqesac = True
                pngfile = rje.makePath('%s%s.png' % (self.info['HaqDir'],acc),wholepath=True)
                if not self.force() and rje.exists(pngfile):
                    self.printLog('#SKIP','Found evidence of completed run: %s (force=F). Skipping.' % pngfile)
                    runhaqesac = False
                ancfile = rje.makePath('%s%s.anc.fas' % (self.info['HaqDir'],acc),wholepath=True)
                if not self.force() and rje.exists(ancfile):
                    self.printLog('#SKIP','Found evidence of completed run: %s (force=F). Skipping.' % ancfile)
                    runhaqesac = False

        except:
            os.chdir(self.info['RunPath'])
            self.errorLog('Major problem with MultiHAQ.farmHAQ',quitchoice=True)
Esempio n. 36
0
    def slimJimMapping(self):  ### Generate SLiMJIM PNGs for all sequences
        """Generate SpokeAln PNGs for all spokes."""
        try:  ### ~ [1] ~ Setup ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
            mapseq = {}  # Dictionary of {dataset:[seqs]}
            scmd = ["autoload=T", "seqnr=F", "accnr=F", "replacechar=F"]
            mseq = rje_seq.SeqList(self.log, self.cmd_list + scmd)  #!# Removed ['minregion=3']+ #!#
            while mseq.seq:
                ## ~ [1a] ~ Read in all sequences for one spoke ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
                pseq = [mseq.seq.pop(0)]  # Pseq = list of sequences for this protein
                while mseq.seq:
                    if (
                        mseq.seq[0].info["Name"].find("Motifs") > 0
                        and string.split(mseq.seq[0].info["Name"])[1] == "Motifs"
                    ):
                        break  # Next protein
                    pseq.append(mseq.seq.pop(0))
                ## ~ [1b] ~ Update relevant sequence dictionary ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
                mapseq[pseq[0].shortName()] = pseq[0:]
            self.printLog("#ALN", "%d distinct alignments identified" % len(mapseq))
            ### ~ [2] ~ Make SLiMJIM visualisations for each protein  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
            ex = 0  # Number of errors
            for mapping in rje.sortKeys(mapseq):
                try:
                    ## ~ [3a] ~ Rename sequences ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
                    basefile = pseq[0].shortName()
                    if self.interactive() > 0 and not rje.yesNo(basefile):
                        continue
                    qryname = pseq[2].shortName()
                    pseq = mapseq[mapping][0:]
                    pseq[0].info["R"] = pseq[0].shortName()[len(qryname) + 1 :]
                    pseq[1].info["R"] = "Masked"
                    for seq in pseq[2:]:
                        seq.info["R"] = seq.info["ID"]
                    ## ~ [3b] ~ Setup new SeqList, strip Query gaps, calculate RelCons ~~~~~~~~~~~~~~~~ ##
                    seqfile = "%s.aln.tdt" % basefile
                    if os.path.exists(seqfile):
                        os.unlink(seqfile)
                    rseq = rje_seq.SeqList(self.log, self.cmd_list + scmd + ["autoload=F"])
                    rseq.seq = pseq
                    rseq.obj["QuerySeq"] = pseq[2]
                    rseq.tidyQueryGaps()
                    rseq.saveR(rseq.seq, seqfile, name="R")
                    rseq.seq = pseq[2:]
                    relfile = "%s.rel.tdt" % basefile
                    if os.path.exists(relfile):
                        os.unlink(relfile)
                    rseq.relCons(relfile)
                    self.deBug(rseq.obj["QuerySeq"].cmd_list)
                    ## ~ [3c] ~ Call R to generate graphics ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
                    rcmd = '%s --no-restore --no-save --args "sfmap2png" "%s"' % (self.info["RPath"], basefile)
                    rslimjim = "%srje.r" % self.info["Path"]
                    rcmd += ' < "%s" > "%s.r.tmp.txt" 2>&1' % (rslimjim, basefile)
                    self.printLog("#RSLIM", rcmd)
                    problems = os.popen(rcmd).read()
                    if problems:
                        self.errorLog(problems, printerror=False)
                    pngx = len(glob.glob("%s*png" % basefile))
                    self.printLog("#PNG", "%d PNG files made for %s" % (pngx, basefile))
                    if pngx and os.path.exists("%s.r.tmp.txt" % basefile):
                        os.unlink("%s.r.tmp.txt" % basefile)
                except:
                    self.errorLog('SLiMJIM visualisation error for "%s"' % mapping)
                    ex += 1
            self.printLog("#SLIMJIM", "Generation of SLiMJIMs complete. %d Problems." % ex)

        except:
            self.errorLog(rje_zen.Zen().wisdom())
Esempio n. 37
0
def runMain():
    try:
        ### <0>  ### Basic Setup of Program
        [info, out, mainlog, cmd_list] = setupProgram()

        ### <1> ### Load Data
        ##  <a>  ## Read in Sequences
        try:
            out.verbose(1, 3, 'Loading sequences...', 0)
            seqfile = 'infile.fas'
            nsfin = None
            for cmd in cmd_list:
                if cmd.find('seqin=') == 0:
                    seqfile = cmd[len('seqin='):]
                if cmd.find('nsfin=') == 0:
                    nsfin = cmd[len('nsfin='):]
            basefile = seqfile
            extension = seqfile[-4:]
            if (extension == '.fas') or (extension == '.phy') or (extension
                                                                  == '.aln'):
                basefile = seqfile[:-4]
            seqs = rje_seq.SeqList(
                log=mainlog,
                cmd_list=['i=0'] + cmd_list +
                ['autofilter=F', 'autoload=F', 'seqin=None'])
            out.verbose(1, 3, "from %s" % seqfile, 1)
            if not seqs.loadSeqs(seqfile=seqfile, seqtype='protein', aln=True):
                raise
            seqfile = seqs.info['Name']
            basefile = rje.baseFile(seqfile)
            mainlog.printLog(
                '#SEQ', "%s protein sequences read from %s\n" %
                (str(seqs.seqNum()), seqfile), 1)
            mainlog.printLog(
                '#SEQ', "Alignment = %s. (%d aa)\n" %
                (seqs.opt['Aligned'], seqs.seq[0].seqLen()), 1)
        except:
            mainlog.errorLog("Fatal run Exception during Sequence Input\n")
            raise
        ##  <b>  ## Read in Tree
        try:
            if not nsfin:
                nsfin = basefile + '.nsf'
            while not os.path.exists(nsfin):
                if out.stat['Interactive'] >= 0:
                    nsfin = rje.choice(
                        text=
                        'Input tree file "%s" not found. Input filename? (Blank to exit.)'
                        % nsfin)
                    if nsfin == '':
                        raise KeyboardInterrupt
                else:
                    mainlog.log.errorLog(
                        'File %s not found. Cannot load tree!' % nsfin,
                        printerror=False,
                        quitchoice=True)
                    raise
            cmd_list.append('nsfin=' + nsfin)
            out.verbose(1, 3, 'Loading tree from %s...' % nsfin, 1)
            mytree = rje_tree.Tree(log=mainlog,
                                   cmd_list=['root=yes'] + cmd_list)
            mytree.mapSeq(seqlist=seqs)
            mytree.textTree()
            if mytree.opt['ReRooted']:
                mytree.saveTree(filename='%s.nsf' % basefile)
        except KeyboardInterrupt:
            mainlog.errorLog("User terminated.\n")
            raise
        except:
            mainlog.errorLog("Fatal run Exception during Tree Input\n")
            raise

        ### <2> ### GASP
        try:
            ## <a> ## InDel Tree Setup
            indeltree = None
            for cmd in cmd_list:
                if cmd.find('indeltree=') == 0:
                    indeltree = cmd[len('indeltree='):]

            ## <b> ## GASP
            if indeltree == None or mytree.node[-1].obj[
                    'Sequence'] == None:  # Perform GASP
                out.verbose(0, 2, '', 3)
                mainlog.printLog('#SEQ',
                                 'GASP: Gapped Ancestral Sequence Prediction',
                                 1)
                if basefile == 'infile':
                    basefile = 'gasp'
                mygasp = rje_ancseq.Gasp(tree=mytree,
                                         ancfile='%s' % basefile,
                                         cmd_list=cmd_list,
                                         log=mainlog)
                out.verbose(0, 2, '%s' % mygasp.details(), 1)
                if out.stat['Interactive'] > 0:
                    if rje.yesNo('Use these parameters?') == False:
                        mygasp.edit()
                mygasp.gasp()
                out.verbose(0, 1, "\n\nGASP run completed OK!", 2)

            ## <c> ## InDel Tree
            if indeltree:
                mytree.indelTree(filename=indeltree)

        except KeyboardInterrupt:
            mainlog.errorLog("User terminated.\n")
            raise
        except:
            mainlog.errorLog("Fatal run Exception during GASP\n")
            raise

        ### <X> ### End
    except KeyboardInterrupt:
        mainlog.errorLog("User terminated.\n")
    except:
        print "Unexpected error:", sys.exc_info()[0]
    mainlog.printLog(
        '#LOG',
        "%s V:%s End: %s\n" % (info.program, info.version,
                               time.asctime(time.localtime(time.time()))), 1)
Esempio n. 38
0
def menu(callobj,
         headtext='',
         menulist=[],
         choicetext='Please select:',
         changecase=True,
         default='',
         jointxt=' = ',
         confirm=False):  ### Main Menu method
    '''
    Main Menu method.
    >> callobj:Object for which attributes are to be read and altered. Also controls interactivity and log.
    >> headtext:str [''] = Introductory text for menu system.
    >> menulist:list [] = List of menu item tuples (edit code,description,optiontype,optionkey)
        - e.g. ('0','Sequence file','info','Name') would edit callobj.info['Name'])
        - If optiontype == 'return' then menu will return the value given in optionkey
        - If optiontype == '' then description will be printed as a breaker
        - If optiontype == 'infile' then callobj.info['Name'] would be changed using rje.getFileName(mustexist=True)
        - If optiontype == 'outfile' then callobj.info['Name'] would be changed using rje.getFileName(confirm=True)
        - If optiontype == 'showtext' then optionkey should contain text to be printed with verbose
        - If optiontype == 'addcmd' then commands can be added.
    >> choicetext:str ['Please select:'] = Text to display for choice option
    >> changecase:boolean [True] = change all choices and codes to upper text
    >> default:str [''] = What to return if nothing selected.
    >> jointxt:str [' = '] = What to join code and description with when listing options.
    >> confirm:bool [False] = Whether to confirm selection.
    << returns optionkey if appropriate, else True
    '''
    try:  ### ~ [0] Setup ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
        ## ~ [0a] Fill out partial (return) tuples ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
        newlist = []
        for mtuple in menulist:
            if len(mtuple) == 2: newlist.append(mtuple + ('return', mtuple[0]))
            elif len(mtuple) == 3: newlist.append(mtuple + (mtuple[0], ))
            else: newlist.append(mtuple)
        menulist = newlist
        ## ~ [0b] Choice Dictionary ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
        choicedict = {}
        for (code, desc, vtype, key) in menulist:
            if not vtype: continue
            if changecase: choicedict[code.upper()] = (vtype, key)
            else: choicedict[code] = (vtype, key)
        ## ~ [0c] Setup Header Text ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
        maxlen = 0
        for line in string.split(headtext, '\n'):
            if len(line) > maxlen: maxlen = len(line)
        headlist = ['#' * (maxlen + 10)]
        for line in string.split(headtext, '\n')[0:]:
            while len(line) < maxlen:
                line += ' '
            headlist.append('# #> %s <# #' % line)
        headlist.append(headlist[0])
        ### ~ [1] Main Menu Loop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
        while menulist:
            ## ~ [1a] Setup ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
            mtxt = '\n%s' % string.join(headlist, '\n')
            while mtxt[-2:] != '\n\n':
                mtxt += '\n'
            for (code, desc, vtype, key) in menulist:
                if vtype and (code or desc):
                    if code and desc: mtxt += '%s%s%s' % (code, jointxt, desc)
                    elif code: mtxt += code
                    elif desc: mtxt += desc
                    if vtype in [
                            'info', 'list', 'opt', 'stat', 'int', 'str',
                            'bool', 'num'
                    ]:
                        mtxt += ': %s' % callobj.getAttribute(
                            vtype, key, default='#!#ERROR#!#')
                    elif vtype in ['infile', 'outfile']:
                        mtxt += ': %s' % callobj.getAttribute(
                            'info', key, default='#!#ERROR#!#')
                else:
                    mtxt += desc
                mtxt += '\n'
            ## ~ [1b] Give Choices ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
            print(mtxt)
            while mtxt:
                try:  ## ~ Input user choice ~~~ ##
                    choice = rje.choice(choicetext,
                                        default=default,
                                        confirm=confirm)
                    if changecase: choice = choice.upper()
                    ## ~ Process user choice ~ ##
                    if choicedict.has_key(choice):
                        (vtype, key) = choicedict[choice]
                        if vtype in ['str', 'info']:
                            callobj.setInfo({
                                key:
                                callobj._editChoice(key, callobj.getStr(key))
                            })
                        if vtype in ['num', 'stat']:
                            callobj.setStat({
                                key:
                                callobj._editChoice(key,
                                                    callobj.getNum(key),
                                                    numeric=True)
                            })
                        if vtype == 'int':
                            callobj.setStat({
                                key:
                                int(
                                    callobj._editChoice(key,
                                                        callobj.getInt(key),
                                                        numeric=True))
                            })
                        if vtype in ['bool', 'opt']:
                            callobj.setOpt({key: not callobj.getBool(key)})
                        if vtype == 'list':
                            callobj.list[key] = string.split(
                                callobj._editChoice(key, callobj.list[key]))
                        if vtype == 'infile':
                            callobj.setInfo({
                                key:
                                rje.getFileName('%s File Name?' % key,
                                                callobj.getStr(key))
                            })
                        if vtype == 'outfile':
                            callobj.setInfo({
                                key:
                                rje.getFileName('%s File Name?' % key,
                                                callobj.getStr(key),
                                                mustexist=False,
                                                confirm=True)
                            })
                        if vtype == 'showtext':
                            callobj.verbose(-1, -1, key)
                            break
                        if vtype == 'addcmd':
                            prevcmd = callobj.cmd_list
                            callobj.cmd_list = rje.inputCmds(out, choice)
                            callobj.printLog(
                                '#CMD',
                                'User Added commands: %s' % callobj.cmd_list)
                            callobj._cmdList()
                            callobj.cmd_list = prevcmd + callobj.cmd_list
                            break
                        if vtype in [
                                'info', 'list', 'opt', 'stat', 'infile',
                                'outfile', 'str', 'bool', 'int', 'num'
                        ]:
                            callobj.printLog('#%s' % vtype.upper(),
                                             'User edited %s parameter' % key)
                            break
                        elif vtype == 'return':
                            return key
                    print('Choice "%s" not recognised!\n' % choice)
                except KeyboardInterrupt:
                    if rje.yesNo('Terminate program?'): raise SystemExit
                    if rje.yesNo('Exit menu and proceed?'): return default
                except:
                    raise
        ### End ###
        return True
    except KeyboardInterrupt:
        raise
    except SystemExit:
        raise
    except:
        if callobj:
            callobj.errorLog('Major disaster in rje_menu.menu()',
                             quitchoice=True)
        else:
            raise
Esempio n. 39
0
 def readHMMPFamSearch(self,resfile=None,readaln=False):  ### Reads HMM PFam Search Results into objects    
     '''
     Reads HMM Search Results into objects.
     >> resfile:str = Results File (set as self.info['OutFile'])
     >> readaln:boolean = whether to bother reading Alignments into objects [False] !!! Currently always False !!!
     '''
     try:
         ### Setup ###
         if not resfile or not os.path.exists(resfile):
             self.log.errorLog('Results file "%s" missing!' % resfile,printerror=False)
             return False
         ## Make RegExp for starting next alignment ##
         re_hit = string.join(['^(\S+):','domain','(\d+)','of','(\d+),','from','(\d+)','to','(\d+):','score','(\S+),','E','=','(\S+)'],'\s+')
         ## Search dictionary as results come back per sequence, not per HMM! ##
         pfam = {}   # Dictionary of {PFam name:search}
         hitx = 0    # Total number of hits
         hitlist = []        # List of sequences processed from file (may or may not include zero hit sequences)
         ### Read in Search results ###
         if open(resfile,'r').readline().find('hmmpfam') != 0:
             self.errorLog('File "%s" does not appear to be an hmmpfam results file' % resfile,printerror=False)
             if rje.yesNo('Delete incorrect results file? (Check that hmmpfam=T is right!)',default='N'):
                 os.unlink(resfile)
                 self.printLog('#DEL','Dodgy results file "%s" deleted.' % resfile)
             return False
         hitname = None
         i = 0; hx = 0; seqx = 0
         RESFILE = open(resfile,'r')
         #x#resline = self.loadFromFile(resfile,chomplines=True)
         #x#while i < len(resline):
         line = RESFILE.readline()
         newres = [rje.chomp(line)]; newresout = True; newresfile = '%s.partial' % resfile
         if os.path.exists(newresfile): os.unlink(newresfile)
         while line:
             self.progLog('\r#RES','Reading %s: %s Seqs; %s Domains; %s Hits' % (resfile,rje.integerString(hx),rje.integerString(len(pfam)),rje.integerString(hitx)))
             line = rje.chomp(line)
             #print line
             ## New Sequence ##
             if rje.matchExp('^Query sequence:\s+(\S+)',line):
                 if newres and newresout and self.opt['CleanRes']: open(newresfile,'a').write(string.join(newres,'\n'))
                 newres = ['',line]; newresout = False
                 hitname = rje.matchExp('^Query sequence:\s+(\S+)',line)[0]; hx += 1
                 #x#if hitname not in hitlist: hitlist.append(hitname)
             ## One Line Data for hits ##
             elif line.find('Parsed for domains:') == 0:
                 #x#i += 3      # Skip two complete lines
                 newres += [line,rje.chomp(RESFILE.readline()),rje.chomp(RESFILE.readline())]
                 line = rje.chomp(RESFILE.readline()); newres.append(line)
                 #Model           Domain  seq-f seq-t    hmm-f hmm-t      score  E-value
                 #--------        ------- ----- -----    ----- -----      -----  -------
                 #Lep_receptor_Ig   1/1      24   114 ..     1   103 []   158.4  1.7e-44
                 # ... else ...
                 #         [no hits above thresholds]
                 while rje.matchExp(string.join(['^(\S+)','\S+','(\d+)','(\d+)\D.+','(\S+)','(\S+)\s*$'],'\s+'),line):
                     newresout = True
                     (dom,start,end,score,eval) = rje.matchExp(string.join(['^(\S+)','\S+','(\d+)','(\d+)\D.+','(\S+)','(\S+)\s*$'],'\s+'),line)
                     if not pfam.has_key(dom):
                         pfam[dom] = self._addSearch()
                         pfam[dom].info['Name'] = dom
                     hit = pfam[dom]._addHit()
                     hit.info['Name'] = hitname
                     aln = hit._addAln()
                     aln.setStat({'SbjStart':string.atoi(start),'SbjEnd':string.atoi(end),'Expect':string.atof(eval),'BitScore':string.atof(score)})
                     hitx += 1
                     self.progLog('\r#RES','Reading %s: %s Seqs; %s Domains; %s Hits' % (resfile,rje.integerString(hx),rje.integerString(len(pfam)),rje.integerString(hitx)))
                     line = rje.chomp(RESFILE.readline()); newres.append(line)
             ## End of Protein ##
             elif line[:2] == '//': hitname = None; newres.append(line)
             elif rje.matchExp('End of rje_hmm reduced results file: (%d) sequences in original',line):
                 seqx = string.atoi(rje.matchExp('End of rje_hmm reduced results file: (\d+) sequences in original',line)[0])
             elif newres: newres.append(line)
             #x#i += 1
             line = RESFILE.readline()
         if newres and newresout and self.opt['CleanRes']: open(newresfile,'a').write(string.join(newres,'\n'))
         if not seqx: seqx = hx
         if self.opt['CleanRes']:
             open(newresfile,'a').write(string.join(['','End of rje_hmm reduced results file: %d sequences in original' % seqx],'\n'))
             os.unlink(resfile)
             os.rename(newresfile,resfile)
             self.printLog('\r#RED','Results file %s replaced with reduced version (%s Hits only)' % (resfile,rje.integerString(hitx)))
         self.printLog('\r#RES','Reading %s complete: %s Seqs; %s Domains; %s Hits' % (resfile,rje.integerString(seqx),rje.integerString(len(pfam)),rje.integerString(hitx)))
         return True
     except:
         self.log.errorLog('Calamity during readHMMSearch(%s)' % (resfile))
         return False
Esempio n. 40
0
    def readPELM(self): ### Reads phosphoELM into classes. Extracts UniProt data if available for Species etc.
        '''Reads phosphoELM into classes. Extracts UniProt data if available for Species etc.'''
        try:### ~ [1] Setup & Read File into Data Dictionary ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
            data = rje.dataDict(self,self.info['PELM'],mainkeys=['acc','position'])
            seqdict = {}    # Dictionary of Acc:Sequence

            ### ~ [2] Generate PhosphoSites dictionary ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
            pdict = self.dict['PhosphoSites']
            for dkey in data:
                ## ~ [2a] Basic acc, seq and pos ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
                (acc,pos) = string.split(dkey)
                pos = string.atoi(pos)
                if acc not in pdict: pdict[acc] = {}
                if pos not in pdict[acc]: pdict[acc][pos] = {}
                ## ~ [2b] PhosphoELM data with checks ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
                if acc not in seqdict: seqdict[acc] = data[dkey]['sequence']
                elif seqdict[acc] != data[dkey]['sequence']: self.log.printLog('#ERR','Warning. Sequence mismatch for %s' % acc)
                if 'aa' not in pdict[acc][pos]: pdict[acc][pos]['aa'] = data[dkey]['code']
                elif pdict[acc][pos]['aa'] != data[dkey]['code']: self.log.printLog('#ERR','Warning. PhosphoSite mismatch for %s at pos %d: %s not %s' % (acc,pos,data[dkey]['code'],pdict[acc][pos]['aa']))
                if data[dkey]['code'] != seqdict[acc][(pos-1):pos]: self.log.printLog('#ERR','Warning. PhosphoSeq mismatch for %s at pos %d: %s not %s' % (acc,pos,data[dkey]['code'],seqdict[acc][pos-1:pos]))

            ### ~ [3] Make sequence objects and update PhosphoSites keys ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
            ## ~ [3a] Setup objects ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
            acclist = rje.sortKeys(seqdict)
            pelmuni = rje_uniprot.UniProt(self.log,self.cmd_list)   # UniProt entry
            unidict = pelmuni.accDict(acclist)        # Dictionary of {acc:UniProtEntry}
            pelmseq = rje_seq.SeqList(self.log,self.cmd_list+['seqin=None'])            # SeqList object
            ## ~ [3b] Add one sequence for each AccNum and update seqdict  ~~~~~~~~~~~~~~~~~~~~~~~~ ##
            #!# Look out for splice variants! (There are some!) - Copy UniProt and change sequence & AccNum #!#
            for acc in acclist:     #!# Make accdict of {acc:Seq} using unidict and seqlist #!#
                sequence = seqdict[acc]
                try:
                    uni = unidict[string.split(acc,'-')[0]]
                    desc = uni.obj['Sequence'].info['Description']
                    name = '%s__%s %s' % (uni.obj['Sequence'].info['ID'],acc,desc)
                    if sequence != uni.obj['Sequence'].info['Sequence']:
                        self.log.printLog('#WARNING','Sequence mismatch for UniProt entry %s' % acc)
                except:
                    self.log.errorLog('Problem with %s' % acc)
                    name = '%s_UNK__%s' % (acc,acc)             #!# Add sequences where UniProt missing #!#
                seqdict[acc] = pelmseq._addSeq(name,sequence)
            ## ~ [3c] Filtering of sequences ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
            if self.opt['FilterSeq']:
                pelmseq.autoFilter()
                for acc in acclist:
                    if seqdict[acc] not in pelmseq.seq: seqdict.pop(acc)
                acclist = rje.sortKeys(seqdict)
            ## ~ [3d] Save sequences for BLASTing ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
            if not os.path.exists(self.info['PELMFas']) or self.stat['Interactive'] < 0 or rje.yesNo('%s exists: overwrite?' % self.info['PELMFas']):
                pelmseq.saveFasta(seqfile=self.info['PELMFas'])
            self.obj['SeqList'] = pelmseq
            self.obj['UniProt'] = pelmuni
        except: self.log.errorLog('Problem during PhosphoSeq.readPELM')
Esempio n. 41
0
    def pairwiseAQ(self,seqlist=None,query=None,focus=[0,0]):     ### Performs PAQ on seqlist, adding seq.info['PAQ']
        '''
        Performs PAQ on seqlist, adding seq.info['PAQ']
        >> seqlist:rje_seq.SeqList Object
        - NB. This object will itself have sequences removed from it, so beware!
        - A new info key will be added: PAQ = PAQ sequences with alignment Xs
        >> focus:list of range positions [X:Y] to look at. If Y=0 then [X:]. 
        '''
        ### <PAQ0> ### Setup
        try:
            _stage = '<0> Setup'
            haqlist = seqlist   # SeqList Object to store individually Xd sequences
            if not query:
                query = haqlist.obj['QuerySeq']
            if self.opt['NoQuery'] or not query:
                query = haqlist.seq[random.randint(0,haqlist.seqNum()-1)]
                self.log.printLog('#QRY','Temp (random) query %s assigned for PAQ' % query.shortName())
            #!# paqx = [False] * seqlist.seq[0].seqLen()    # List of whether a column of the alignment is bad (has an X) [True] or not [False]
            #!# - make this a method?!

            pwaq = {}    # Dictionary of lists of pairwise alignements
            block_align = {}    # Dictionary of whether residue in block of sequence that is well-aligned or not
            for seq in haqlist.seq:
                block_align[seq] = [False] * seq.seqLen()
                seq.info['PAQ'] = seq.info['Sequence'][0:]
                if seq.info.has_key('SAQX') and len(seq.info['SAQX']) == seq.seqLen():   #!# Should no longer be issues due to length changes following realignment
                    seq.info['Sequence'] = seq.info['SAQX'][0:]
                elif seq.info.has_key('SAQX'):
                    self.log.errorLog('Cannot use SAQX for %s in PAQ as wrong length.' % seq.shortName(),printerror=False)
                for otherseq in haqlist.seq:
                    pwaq[(seq,otherseq)] = [False] * seq.seqLen()

        ### <PAQ1> ### Directional Pairwise Comparisons of sequences
            _stage = '<1> Pairwise Comparisons'
            infotxt = 'PAQ%d: Pairwise Comparisons ...' % self.stat['PAQCyc']
            #print self.stat
            for seq in haqlist.seq:
                for otherseq in haqlist.seq:
                    myinfo = '%s %.1f%% %.1f%%   ' % (infotxt,(100.0 * haqlist.seq.index(seq) / haqlist.seqNum()),(100.0 * haqlist.seq.index(otherseq) / haqlist.seqNum()))
                    self.log.printLog('\r#PAQ',myinfo,log=False,newline=False)
                    for r in range(seq.seqLen()):
                        ar = seq.info['Sequence'][r]
                        ## <i> ## Look for PW aligned block
                        _stage = '<1-i> Pairwise Comparisons'
                        if ar not in ['-','X']: # Start of test block
                            blen = 0    # Block length (PAQBlock) = AAs
                            win = 0     # Window length = all sequence
                            matchx = 0  # Score for residues in window 
                            while blen < self.stat['PAQBlock'] and (r+win) < seq.seqLen():     # This time we allow overshoots in both directions
                                ar = seq.info['Sequence'][r+win]
                                at = otherseq.info['Sequence'][r+win]
                                if 'X' in [ar,at]:     # Hit Bad Region: Abort
                                    break
                                else:   # Better region
                                    if ar != '-':   
                                        blen += 1   # Increase Block
                                        matchx += self._saqCon(ar,at)
                                win += 1
                        ## <ii> ## Update pwaq if block good
                            _stage = '<1-ii> Pairwise Comparisons'
                            if matchx >= self.stat['PAQMatch']:
                                for w in range(win):
                                    if seq.info['Sequence'][r+w] in ['-','X']:
                                        pwaq[(seq,otherseq)][r+w] = False
                                    else:
                                        pwaq[(seq,otherseq)][r+w] = True           
            self.log.printLog('\r#PAQ','%s 100.0% 100.0%.   ' % infotxt,log=False)
                
        ### <PAQ2> ### Link back to Query
            _stage = '<2> Linking to Query'
            ### <PAQ2a> ### Network of Pairwise Quality alignments
            _stage = '<2a> Linking to Query'
            #self.verbose(1,3,'PAQ%d: Linking Residues to Query (%s)' % (self.stat['PAQCyc'],query.shortName()),0)
            infotxt = 'PAQ%d: Linking Residues to Query (%s) ...' % (self.stat['PAQCyc'],query.shortName())
            for r in range(query.seqLen()):
                _stage = '<2a> Linking to Query'
                self.log.printLog('\r#PAQ','%s %.1f%%' % (infotxt,(100.0 * r / query.seqLen())),log=False,newline=False)
                qok = {}    # Dictionary of whether residue in seq OK, i.e. linked to query
                for seq in haqlist.seq:
                    qok[seq] = False
                qok[query] = True
                sok = [0,1] # List of OK sequence for residue
                while sok[-2] != sok[-1]:
                    ## <i> ## Match pairs, starting with query
                    _stage = '<2a-i> Linking to Query'
                    for seq in haqlist.seq:
                        if qok[seq]:
                            for otherseq in haqlist.seq:
                                if pwaq[(seq,otherseq)][r] or pwaq[(otherseq,seq)][r]:
                                    qok[otherseq] = True
                    ## <ii> ## Update sok
                    _stage = '<2a-ii> Linking to Query'
                    sok.append(0)
                    for seq in haqlist.seq:
                        if qok[seq]:
                            sok[-1] += 1
                            block_align[seq][r] = True
                _stage = '<2a-iii> Linking to Query'
                if sok[-1] == 1:    # Only query OK!
                    block_align[query][r] = False
            self.log.printLog('\r#PAQ','%s 100.0%%' % infotxt,log=False)
            
            ### <PAQ2b> ### Allow for divergence (Conserved Anchors)
            _stage = '<2b> Anchors'
            if self.opt['Anchors']:
                infotxt = 'PAQ%d: Accounting for divergence within aligned regions ...' % self.stat['PAQCyc']
                ## <i> ## Setup gapped list
                gapped = [False] * query.seqLen()   # Whether column of alignment is gapped
                for seq in haqlist.seq:
                    self.log.printLog('\r#PAQ','%s %.1f%%  ' % (infotxt,(50.0 * haqlist.seq.index(seq) / haqlist.seqNum())),log=False,newline=False)
                    (start,end) = (0,seq.seqLen())
                    while seq.info['Sequence'][start] == '-':
                        start += 1
                    while seq.info['Sequence'][end-1] == '-':
                        end -=1
                    for r in range(start,end):
                        if seq.info['Sequence'][r] == '-':
                            gapped[r] = True
                ## <ii> ## Correction
                for seq in haqlist.seq:
                    self.log.printLog('\r#PAQ','%s %.1f%%  ' % (infotxt,(50 + (50.0 * haqlist.seq.index(seq) / haqlist.seqNum()))),log=False,newline=False)
                    for r in range(seq.seqLen()):
                        if block_align[seq][r] or gapped[r]:    # No need for correction
                            continue
                        # Move in both directions: if good residues (or sequence end) reached before gaps then reinstate
                        winf = 0
                        fwd = True
                        fok = False
                        winb = 0
                        bwd = True
                        bok = False
                        while fwd or bwd:
                            # End of seqs
                            if (r + winf) >= seq.seqLen():
                                fwd = False
                            if (r - winb) < 0:
                                bwd = False
                            # Gaps/OK
                            if fwd:
                                if gapped[r+winf]:
                                    fok = False
                                    fwd = False
                                elif block_align[seq][r+winf]:
                                    fwd = False
                                else:
                                    winf += 1
                            if bwd:
                                if gapped[r-winb]:
                                    bok = False
                                    bwd = False
                                elif block_align[seq][r-winb]:
                                    bwd = False
                                else:
                                    winb += 1
                        if fok and bok: # Reinstate
                            for w in range(r-winb,r+winf+1):
                                block_align[seq][w] = True
                self.log.printLog('\r#PAQ','%s 100.0%%  ' % infotxt,log=False)

        ### <PAQ3> ### X out badly-aligned blocks
            _stage = '<3> Making bad sequence blocks'
            for seq in haqlist.seq:
                newseq = ''
                for r in range(seq.seqLen()):
                    if block_align[seq][r] or seq.info['Sequence'][r] == '-':
                        newseq += seq.info['Sequence'][r]
                    else: # Bad residue
                        newseq += 'X'
                seq.info['Sequence'] = newseq[0:]
            #!# Add saving of data in 'datafull' option

        ### <PAQ4> ### Remove sequences and/or badly-aligned regions
            _stage = '<4> Removing sequences/regions'
            self.verbose(0,4,'PAQ%d: Removing bad sequences and/or dodgy regions...' % self.stat['PAQCyc'],0)
            ## <PAQ4a> ## Process Query first - only interested in good regions within query
            if self.opt['NoQuery']:  # No preprocessing of Query
                self.verbose(0,4,'no Master Query processing...',0)
            else:
                haqlist.mapX(query, qtrim=True, focus=focus) # Replaces other sequence ends and query X columns with Xs
                self.verbose(0,4,'Query (%s) processed...' % query.shortName(),0)
            self.verbose(0,3,'',1)
            if self.opt['ManPAQ']:
                haqlist.saveFasta(seqfile='%s.manpaq.fas' % haqlist.info['Basefile'])

            ## <PAQ4b> ## Cycle through other sequences (worst first) until no more good residues are lost
            goodres = [0, self._getGood(haqlist.seq)]   # List of number of 'good' residues
            goodseq = [0, haqlist.seqNum()]
            while goodres[-1] != goodres[-2] or goodseq[-1] != goodseq[-2]:
                colgood = [0] * haqlist.seq[0].seqLen()    # Good residues per column
                for r in range(haqlist.seq[0].seqLen()):
                    for seq in haqlist.seq:
                        if seq.info['Sequence'][r] != '-' and seq.info['Sequence'][r] != 'X':
                            colgood[r] += 1
                ## <i> ## Compare relative loss of masking and losing each sequence
                keepx = {}  # Dictionary of seq:number of lost residues if seq kept
                losex = {}  # Dictionary of seq:number of lost residues if seq lost
                badkx = -1  # Biggest loss if kept
                badlx = -1  # Biggest loss if lost
                bads = None # Worst sequence
                for seq in haqlist.seq:
                    if seq == query and self.opt['NoQuery'] == False:
                        continue    # Next sequence
                    # Calculate keepx and losex
                    keepx[seq] = 0
                    for r in range(seq.seqLen()):
                        if seq.info['Sequence'][r] == 'X':
                            keepx[seq] += colgood[r]
                        #?# In Perl HAQESAC there was an option to ignore Orphans in this calculation. Reinstate?
                    losex[seq] = self._getGood([seq])
                    # Update bads if worse
                    if keepx[seq] > badkx:
                        badkx = keepx[seq]
                        badlx = losex[seq]
                        bads = seq
                    elif keepx[seq] == badkx and losex[seq] < badlx:
                        badlx = losex[seq]
                        bads = seq
                ## <ii> ## Remove bad sequences and/or regions
                if badkx > 0:
                    if self.opt['ManPAQ']:
                        default = 'N'
                        if badkx * self.stat['PAQKeepLen'] > badlx * self.stat['PAQKeepSeq']:   # Lose sequence!
                            default = 'Y'
                        if rje.yesNo('%s worst: -%s aa if kept vs -%s aa if lost. Remove?' % (bads.shortName(),rje.integerString(badkx),rje.integerString(badlx)),default):
                            seqlist.removeSeq(text='PAQ%d: -%s aa if kept vs -%s aa if lost. (Manual decision.)' % (self.stat['PAQCyc'],rje.integerString(badkx),rje.integerString(badlx)),seq=bads)
                        else:   # X out
                            haqlist.mapX(bads)
                    else:
                        self.verbose(1,3,'%s worst: -%s aa if kept vs -%s aa if lost.' % (bads.shortName(),rje.integerString(badkx),rje.integerString(badlx)),1)
                        #!# Add option for upweighting certain sequence type? (e.g. vs fragment or hypothetical?)
                        if badkx * self.stat['PAQKeepLen'] > badlx * self.stat['PAQKeepSeq']:   # Lose sequence!
                            seqlist.removeSeq(text='PAQ%d: -%s aa if kept vs -%s aa if lost.' % (self.stat['PAQCyc'],rje.integerString(badkx),rje.integerString(badlx)),seq=bads)
                        else:   # X out
                            haqlist.mapX(bads)
                ### <iii> ### Recalculate goodres
                goodres.append(self._getGood(haqlist.seq))
                goodseq.append(haqlist.seqNum())
                self.verbose(1,3,'%d -> %d "good" aa' % (goodres[-2],goodres[-1]),1)
                        
        ### <PAQ5> ### Reinstate UnX'd sequence:
            _stage = '<5> Replacing sequences'
            for seq in haqlist.seq:
                [seq.info['PAQ'],seq.info['Sequence']] = [seq.info['Sequence'],seq.info['PAQ']]
            if self.opt['ManPAQ'] and rje.checkForFile('%s.manpaq.fas' % haqlist.info['Basefile']):
                os.unlink('%s.manpaq.fas' % haqlist.info['Basefile'])

        except:
            self.log.errorLog('rje_haq.py ~ Problem with pairwiseAQ %s.' % _stage, True)
Esempio n. 42
0
    def farmHAQ(self):  ### Uses SLiMFarmer to farm out the HAQESAC runs
        '''Uses SLiMFarmer to farm out the HAQESAC runs.'''
        try:  ### ~ [0] Setup ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
            batfile = os.path.abspath(
                rje.makePath('%shaqesac.bat' % self.info['HaqDir'],
                             wholepath=True))
            self.printLog('#FARM', batfile)
            if not rje.exists(batfile):
                raise IOError('Cannot find %s' % batfile)
            farmcmd = [
                'subjobs=%s' % batfile, 'farm=batch', 'qsub=F', 'i=-1',
                'runpath=%s' % os.path.abspath(self.info['HaqDir'])
            ]
            if self.opt['MultiHAQ']:
                haqfarm = ['First round', 'Second round']
            else:
                haqfarm = ['Complete run']

            ### ~ [1] Peform HAQESAC runs ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
            for farmrun in haqfarm:
                self.printLog(
                    '#CHDIR', 'Changing directory for %s farming: %s' %
                    (farmrun, self.info['HaqDir']))
                os.chdir(self.info['HaqDir'])
                farmer = slimfarmer.SLiMFarmer(self.log,
                                               self.cmd_list + farmcmd)
                farmer.slimFarm()
                os.chdir(self.info['RunPath'])
                self.printLog(
                    '#CHDIR', 'Changed directory post-farming: %s' %
                    self.info['RunPath'])
                self.printLog('#FARM',
                              'HAQESAC %s farming complete.' % farmrun)
            return True

            #!# Add identifying and skipping of partial runs.

            for seq in self.seqs():
                ## ~ [1a] Check AutoSkip ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
                acc = seq.info['AccNum']
                if finalrun and acc in processed and (
                        self.opt['AutoSkip'] or (self.i() >= 0 and rje.yesNo(
                            '%s already covered by previous HAQESAC. Skip?' %
                            seq.shortName()))):
                    self.printLog(
                        '#SKIP',
                        '%s already covered by previous HAQESAC: Skipped' %
                        seq.shortName())
                    continue
                ## ~ [1b] Check Whether to run (re-runs and low sequence number) ~~~~~~~~~~~~~~~~~~ ##
                logfile = rje.makePath('%s%s.log' % (self.info['HaqDir'], acc),
                                       wholepath=True)
                infile = rje.makePath('%s%s.fas' % (self.info['HaqDir'], acc),
                                      wholepath=True)
                pkfile = rje.makePath('%s%s.pickle' %
                                      (self.info['HaqDir'], acc),
                                      wholepath=True)
                pkzfile = rje.makePath('%s%s.pickle.gz' %
                                       (self.info['HaqDir'], acc),
                                       wholepath=True)
                if not os.path.exists(infile):
                    self.printLog(
                        '#SKIP', '%s input file %s not found: Skipped' %
                        (seq.shortName(), infile))
                    continue
                if not finalrun and not self.opt['Force'] and rje.isYounger(
                        pkzfile, infile) == pkzfile:
                    self.printLog('#SKIP',
                                  '%s run detected: Skipped' % seq.shortName())
                    continue
                if not finalrun and not self.opt['Force'] and rje.isYounger(
                        pkfile, infile) == pkfile:
                    self.printLog('#SKIP',
                                  '%s run detected: Skipped' % seq.shortName())
                    continue
                inseqx = rje_seq.SeqCount(self, infile)
                if inseqx < 2:
                    self.printLog(
                        '#SKIP',
                        'Only one sequence found in %s: Skipped' % (infile))
                    continue
                ## ~ [1c] Pause if running in Chaser Mode and no Pickle ~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
                pickled = os.path.exists(pkfile) or os.path.exists(
                    '%s.gz' % pkfile)
                tm = 0
                while secondrun and self.opt['Chaser'] and not pickled:
                    self.progLog(
                        '#WAIT',
                        'No %s pickle. Sleeping for %d min.' % (acc, tm))
                    time.sleep(60 * tm)
                    tm += 1
                    pickled = os.path.exists(pkfile) or os.path.exists(
                        '%s.gz' % pkfile)
                    if not pickled:
                        try:
                            rje.choice(
                                'Press <ENTER> to try again, or <CTRL+C> to Quit'
                            )
                        except:
                            self.printLog('#PICKLE',
                                          'No %s pickle.' % (acc, tm))
                            self.printLog('\r#MULTI',
                                          'Exiting multiHAQ "Chaser" run.')
                            return
                ## ~ [1d] Run HAQESAC ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
                runhaqesac = True
                pngfile = rje.makePath('%s%s.png' % (self.info['HaqDir'], acc),
                                       wholepath=True)
                if not self.force() and rje.exists(pngfile):
                    self.printLog(
                        '#SKIP',
                        'Found evidence of completed run: %s (force=F). Skipping.'
                        % pngfile)
                    runhaqesac = False
                ancfile = rje.makePath('%s%s.anc.fas' %
                                       (self.info['HaqDir'], acc),
                                       wholepath=True)
                if not self.force() and rje.exists(ancfile):
                    self.printLog(
                        '#SKIP',
                        'Found evidence of completed run: %s (force=F). Skipping.'
                        % ancfile)
                    runhaqesac = False

        except:
            os.chdir(self.info['RunPath'])
            self.errorLog('Major problem with MultiHAQ.farmHAQ',
                          quitchoice=True)
Esempio n. 43
0
    def readPELM(
        self
    ):  ### Reads phosphoELM into classes. Extracts UniProt data if available for Species etc.
        '''Reads phosphoELM into classes. Extracts UniProt data if available for Species etc.'''
        try:  ### ~ [1] Setup & Read File into Data Dictionary ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
            data = rje.dataDict(self,
                                self.info['PELM'],
                                mainkeys=['acc', 'position'])
            seqdict = {}  # Dictionary of Acc:Sequence

            ### ~ [2] Generate PhosphoSites dictionary ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
            pdict = self.dict['PhosphoSites']
            for dkey in data:
                ## ~ [2a] Basic acc, seq and pos ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
                (acc, pos) = string.split(dkey)
                pos = string.atoi(pos)
                if acc not in pdict: pdict[acc] = {}
                if pos not in pdict[acc]: pdict[acc][pos] = {}
                ## ~ [2b] PhosphoELM data with checks ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
                if acc not in seqdict: seqdict[acc] = data[dkey]['sequence']
                elif seqdict[acc] != data[dkey]['sequence']:
                    self.log.printLog(
                        '#ERR', 'Warning. Sequence mismatch for %s' % acc)
                if 'aa' not in pdict[acc][pos]:
                    pdict[acc][pos]['aa'] = data[dkey]['code']
                elif pdict[acc][pos]['aa'] != data[dkey]['code']:
                    self.log.printLog(
                        '#ERR',
                        'Warning. PhosphoSite mismatch for %s at pos %d: %s not %s'
                        %
                        (acc, pos, data[dkey]['code'], pdict[acc][pos]['aa']))
                if data[dkey]['code'] != seqdict[acc][(pos - 1):pos]:
                    self.log.printLog(
                        '#ERR',
                        'Warning. PhosphoSeq mismatch for %s at pos %d: %s not %s'
                        % (acc, pos, data[dkey]['code'],
                           seqdict[acc][pos - 1:pos]))

            ### ~ [3] Make sequence objects and update PhosphoSites keys ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
            ## ~ [3a] Setup objects ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
            acclist = rje.sortKeys(seqdict)
            pelmuni = rje_uniprot.UniProt(self.log,
                                          self.cmd_list)  # UniProt entry
            unidict = pelmuni.accDict(
                acclist)  # Dictionary of {acc:UniProtEntry}
            pelmseq = rje_seq.SeqList(self.log, self.cmd_list +
                                      ['seqin=None'])  # SeqList object
            ## ~ [3b] Add one sequence for each AccNum and update seqdict  ~~~~~~~~~~~~~~~~~~~~~~~~ ##
            #!# Look out for splice variants! (There are some!) - Copy UniProt and change sequence & AccNum #!#
            for acc in acclist:  #!# Make accdict of {acc:Seq} using unidict and seqlist #!#
                sequence = seqdict[acc]
                try:
                    uni = unidict[string.split(acc, '-')[0]]
                    desc = uni.obj['Sequence'].info['Description']
                    name = '%s__%s %s' % (uni.obj['Sequence'].info['ID'], acc,
                                          desc)
                    if sequence != uni.obj['Sequence'].info['Sequence']:
                        self.log.printLog(
                            '#WARNING',
                            'Sequence mismatch for UniProt entry %s' % acc)
                except:
                    self.log.errorLog('Problem with %s' % acc)
                    name = '%s_UNK__%s' % (
                        acc, acc)  #!# Add sequences where UniProt missing #!#
                seqdict[acc] = pelmseq._addSeq(name, sequence)
            ## ~ [3c] Filtering of sequences ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
            if self.opt['FilterSeq']:
                pelmseq.autoFilter()
                for acc in acclist:
                    if seqdict[acc] not in pelmseq.seq: seqdict.pop(acc)
                acclist = rje.sortKeys(seqdict)
            ## ~ [3d] Save sequences for BLASTing ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
            if not os.path.exists(
                    self.info['PELMFas']
            ) or self.stat['Interactive'] < 0 or rje.yesNo(
                    '%s exists: overwrite?' % self.info['PELMFas']):
                pelmseq.saveFasta(seqfile=self.info['PELMFas'])
            self.obj['SeqList'] = pelmseq
            self.obj['UniProt'] = pelmuni
        except:
            self.log.errorLog('Problem during PhosphoSeq.readPELM')
Esempio n. 44
0
 def multiHAQ(self,secondrun=False):     ### Executes main HAQESAC runs
     '''Executes main HAQESAC runs.'''
     try:### ~ [0] Setup ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         finalrun = secondrun == self.opt['MultiHAQ']    # Whether this is the manual HAQESAC phase
         qryacc = self.obj['SeqList'].accList()          # Full list of Query accession numbers
         processed = []                                  # List of processed sequence accession numbers
         ### ~ [1] Peform HAQESAC runs ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         for seq in self.seqs():
             ## ~ [1a] Check AutoSkip ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
             acc = seq.info['AccNum']
             if finalrun and acc in processed and (self.opt['AutoSkip'] or (self.i() >=0 and rje.yesNo('%s already covered by previous HAQESAC. Skip?' % seq.shortName()))):
                 self.printLog('#SKIP','%s already covered by previous HAQESAC: Skipped' % seq.shortName()); continue
             ## ~ [1b] Check Whether to run (re-runs and low sequence number) ~~~~~~~~~~~~~~~~~~ ##
             logfile = rje.makePath('%s%s.log' % (self.info['HaqDir'],acc),wholepath=True)
             infile = rje.makePath('%s%s.fas' % (self.info['HaqDir'],acc),wholepath=True)
             pkfile = rje.makePath('%s%s.pickle' % (self.info['HaqDir'],acc),wholepath=True)
             pkzfile = rje.makePath('%s%s.pickle.gz' % (self.info['HaqDir'],acc),wholepath=True)
             if not os.path.exists(infile): self.printLog('#SKIP','%s input file %s not found: Skipped' % (seq.shortName(),infile)); continue
             if not finalrun and not self.opt['Force'] and rje.isYounger(pkzfile,infile) == pkzfile:
                 self.printLog('#SKIP','%s run detected: Skipped' % seq.shortName()); continue
             if not finalrun and not self.opt['Force'] and rje.isYounger(pkfile,infile) == pkfile:
                 self.printLog('#SKIP','%s run detected: Skipped' % seq.shortName()); continue
             inseqx = rje_seq.SeqCount(self,infile)
             if inseqx < 2: self.printLog('#SKIP','Only one sequence found in %s: Skipped' % (infile)); continue
             ## ~ [1c] Pause if running in Chaser Mode and no Pickle ~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
             pickled = os.path.exists(pkfile) or os.path.exists('%s.gz' % pkfile); tm = 0
             while secondrun and self.opt['Chaser'] and not pickled:
                 self.progLog('#WAIT','No %s pickle. Sleeping for %d min.' % (acc,tm))
                 time.sleep(60*tm); tm += 1
                 pickled = os.path.exists(pkfile) or os.path.exists('%s.gz' % pkfile)
                 if not pickled:
                     try: rje.choice('Press <ENTER> to try again, or <CTRL+C> to Quit')
                     except:
                         self.printLog('#PICKLE','No %s pickle.' % (acc,tm))
                         self.printLog('\r#MULTI','Exiting multiHAQ "Chaser" run.'); return
             ## ~ [1d] Run HAQESAC ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
             runhaqesac = True
             pngfile = rje.makePath('%s%s.png' % (self.info['HaqDir'],acc),wholepath=True)
             if not self.force() and rje.exists(pngfile):
                 self.printLog('#SKIP','Found evidence of completed run: %s (force=F). Skipping.' % pngfile)
                 runhaqesac = False
             ancfile = rje.makePath('%s%s.anc.fas' % (self.info['HaqDir'],acc),wholepath=True)
             if not self.force() and rje.exists(ancfile):
                 self.printLog('#SKIP','Found evidence of completed run: %s (force=F). Skipping.' % ancfile)
                 runhaqesac = False
             #if not finalrun or self.opt['Force'] or rje.isYounger(logfile,nsfile) != logfile:
             if runhaqesac:
                 haqcmd = ['ini=haqesac.ini','seqin=%s.fas' % acc, 'query=%s' % acc, 'basefile=%s' % acc, 'newlog=F']
                 self.printLog('#HAQ','Running HAQESAC for %s - will have own log etc.' % seq.shortName(),log=False)
                 os.chdir(self.info['HaqDir'])
                 info = haqesac.makeInfo()
                 haqcmd = rje.getCmdList(haqcmd,info=info)
                 out = rje.Out(cmd_list=haqcmd)    # Sets up Out object for controlling output to screen
                 out.printIntro(info)                                # Prints intro text using details from Info object
                 haqlog = rje.setLog(info,out,haqcmd)                 # Sets up Log object for controlling log file output
                 try: haqesac.HAQESAC(log=haqlog, cmd_list=haqcmd).run(setobjects=True)
                 except:
                     os.chdir(self.info['RunPath'])
                     if self.i() >= 0 and rje.yesNo('Problem with %s HAQESAC run. Abort?' % seq.shortName()): raise KeyboardInterrupt
                 os.chdir(self.info['RunPath'])
                 if finalrun: self.printLog('#HAQ','HAQESAC final round run for %s' % seq.shortName())
                 else: self.printLog('#HAQ','HAQESAC first round run for %s' % seq.shortName())
             ## ~ [1e] Update ScreenQry ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
             if not self.opt['ScreenQry'] or not finalrun: continue
             qacclist = []
             for qacc in rje_seq.SeqList(self.log,['seqin=%s' % infile,'autoload=T','autofilter=F']).accList():
                 if qacc in qryacc and qacc != acc: qacclist.append(qacc)
                 if qacc in qryacc and qacc not in processed: processed.append(qacc)
             self.printLog('#QRY','%d other queries found in %s: [%s]' % (len(qacclist),infile,string.join(qacclist,'; ')))
             self.printLog('#QRY','%d of %d queries processed' % (len(processed),self.seqNum()))
         ### ~ [2] MultiHAQ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         if not finalrun: self.printLog('#MULTI','Executing second round of multiHAQ'); self.multiHAQ(True)
     except: self.errorLog('Major problem with MultiHAQ.multiHAQ',quitchoice=True)
Esempio n. 45
0
 def mapHit(self,seq,hits,hitdict,method):     ### Tries to map seq onto hitseq and returns hit if successful
     '''
     Tries to map seq onto hitseq and returns hit if successful.
     >> seq:Query Sequence Object
     >> hits:List of hits in rough order of goodness
     >> hitdict:Dictionary of {hitname:stats}
     >> method:Mapping method to use
     '''
     try:### ~ [0] Setup ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         (name,sequence) = seq
         data = rje_sequence.extractNameDetails(name,self)
         data['Sequence'] = seq[1]
         data['ShortName'] = string.split(seq[0])[0]
         for hit in hitdict:
             hitdict[hit]['Data'] = rje_sequence.extractNameDetails(hitdict[hit]['Seq'][0],self)
             hitdict[hit]['Data']['Sequence'] = hitdict[hit]['Seq'][1]
             hitdict[hit]['Data']['ShortName'] = string.split(hitdict[hit]['Seq'][0])[0]
         ### SkipGene ###
         if method == 'id' and rje.matchExp('^(\S+)_\S+',data['ID']):
             gene = rje.matchExp('^(\S+)_\S+',data['ID'])
             if gene in self.list['SkipGene']:
                 return None
         ### Name, AccNum, Sequence and ID ###
         if method_info[method] in ['Name', 'AccNum', 'Sequence', 'ID']:
             for hit in hits:
                 hitdata = hitdict[hit['Hit']]['Data']
                 if hitdata[method_info[method]] == data[method_info[method]]:
                     if self.i() < 2 or rje.yesNo('Map %s to %s?' % (data['ShortName'],hitdata['ShortName'])):
                         return hit
         ### DescAcc ###
         if method == 'descacc':
             for hit in hits:
                 hitdata = hitdict[hit['Hit']]['Data']
                 if rje.matchExp('\W(%s)\W' % data['AccNum'],hitdata['Name']):
                     if self.i() < 2 or rje.yesNo('Map %s to %s?' % (data['ShortName'],hitdata['ShortName'])):
                         return hit
         ### GABLAM ###
         if method != 'gablam': return None
         focus = self.str['MapFocus'][:1].upper() + self.str['MapFocus'][1:].lower()
         gstat = gstat_type[self.str['MapStat'].lower()]
         possibles = []  # List of Hits that meet MinMap criterion
         for hit in hits:
             hitname = hit['Hit']
             hitdata = hitdict[hit['Hit']]['Data']
             if self.getNum('AutoMap') > 0.0 and hitdict[hitname]['%s_%s' % (focus,gstat)] >= self.getNum('AutoMap'):
                 if self.i() < 2 or rje.yesNo('Map %s to %s?' % (data['ShortName'],hitdata['ShortName'])):
                     return hit
             elif hitdict[hitname]['%s_%s' % (focus,gstat)] >= self.getNum('MinMap'):
                 possibles.append(hit)
         ### Manual GABLAM Choice ###
         if self.i() < 0 or not possibles: return None
         possibles.reverse()
         print '\nMapping options for %s:\n' % data['ShortName']
         for p in range(len(possibles)):
             hit = possibles[p]
             hitname = hit['Hit']
             hitdata = hitdict[hit['Hit']]['Data']
             print '<%d> %s (%d aa) =\t' % (len(possibles)-p,hitdata['Name'],hit['Length']),
             print '%.1f%% Qry Len,' % (100.0 * hit['Length'] / len(seq[1])),
             print '%.1f%% ID (%.1f%% Sim, %.1f%% Cov.)' % (hitdict[hitname]['Hit_ID'],hitdict[hitname]['Hit_Sim'],hitdict[hitname]['Hit_Len']),
             print '(Qry: %.1f%% ID (%.1f%% Sim, %.1f%% Cov.)' % (hitdict[hitname]['Query_ID'],hitdict[hitname]['Query_Sim'],hitdict[hitname]['Query_Len'])
         choice = -1
         print '<0> No mapping.\n'
         ## Choice ##
         while 1:
             choice = rje.getInt('Select sequence to replace %s?' % data['ShortName'],default=1,confirm=True)
             i = len(possibles) - choice
             if choice == 0: # No mapping
                 if self.i() < 2 or rje.yesNo('No GABLAM mapping for %s?' % (data['ShortName'])): return None
             elif choice > 0 and choice <= len(possibles):    
                 hit = possibles[i]
                 hitdata = hitdict[hit['Hit']]['Data']
                 if self.i() < 2 or rje.yesNo('Map %s to %s?' % (data['ShortName'],hitdata['ShortName'])): return hit
     except:
         self.errorLog('Problem during SeqMapper.mapHit(%s)' % method,quitchoice=True)
         return None
Esempio n. 46
0
    def slimJimMapping(self):  ### Generate SLiMJIM PNGs for all sequences
        '''Generate SpokeAln PNGs for all spokes.'''
        try:  ### ~ [1] ~ Setup ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
            mapseq = {}  # Dictionary of {dataset:[seqs]}
            scmd = ['autoload=T', 'seqnr=F', 'accnr=F', 'replacechar=F']
            mseq = rje_seq.SeqList(self.log, self.cmd_list +
                                   scmd)  #!# Removed ['minregion=3']+ #!#
            while mseq.seq:
                ## ~ [1a] ~ Read in all sequences for one spoke ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
                pseq = [mseq.seq.pop(0)
                        ]  # Pseq = list of sequences for this protein
                while mseq.seq:
                    if mseq.seq[0].info['Name'].find(
                            'Motifs') > 0 and string.split(
                                mseq.seq[0].info['Name'])[1] == 'Motifs':
                        break  # Next protein
                    pseq.append(mseq.seq.pop(0))
                ## ~ [1b] ~ Update relevant sequence dictionary ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
                mapseq[pseq[0].shortName()] = pseq[0:]
            self.printLog('#ALN',
                          '%d distinct alignments identified' % len(mapseq))
            ### ~ [2] ~ Make SLiMJIM visualisations for each protein  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
            ex = 0  # Number of errors
            for mapping in rje.sortKeys(mapseq):
                try:
                    ## ~ [3a] ~ Rename sequences ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
                    basefile = pseq[0].shortName()
                    if self.interactive() > 0 and not rje.yesNo(basefile):
                        continue
                    qryname = pseq[2].shortName()
                    pseq = mapseq[mapping][0:]
                    pseq[0].info['R'] = pseq[0].shortName()[len(qryname) + 1:]
                    pseq[1].info['R'] = 'Masked'
                    for seq in pseq[2:]:
                        seq.info['R'] = seq.info['ID']
                    ## ~ [3b] ~ Setup new SeqList, strip Query gaps, calculate RelCons ~~~~~~~~~~~~~~~~ ##
                    seqfile = '%s.aln.tdt' % basefile
                    if os.path.exists(seqfile): os.unlink(seqfile)
                    rseq = rje_seq.SeqList(
                        self.log, self.cmd_list + scmd + ['autoload=F'])
                    rseq.seq = pseq
                    rseq.obj['QuerySeq'] = pseq[2]
                    rseq.tidyQueryGaps()
                    rseq.saveR(rseq.seq, seqfile, name='R')
                    rseq.seq = pseq[2:]
                    relfile = '%s.rel.tdt' % basefile
                    if os.path.exists(relfile): os.unlink(relfile)
                    rseq.relCons(relfile)
                    self.deBug(rseq.obj['QuerySeq'].cmd_list)
                    ## ~ [3c] ~ Call R to generate graphics ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
                    rcmd = '%s --no-restore --no-save --args "sfmap2png" "%s"' % (
                        self.info['RPath'], basefile)
                    rslimjim = '%srje.r' % self.info['Path']
                    rcmd += ' < "%s" > "%s.r.tmp.txt" 2>&1' % (rslimjim,
                                                               basefile)
                    self.printLog('#RSLIM', rcmd)
                    problems = os.popen(rcmd).read()
                    if problems: self.errorLog(problems, printerror=False)
                    pngx = len(glob.glob('%s*png' % basefile))
                    self.printLog(
                        '#PNG', '%d PNG files made for %s' % (pngx, basefile))
                    if pngx and os.path.exists('%s.r.tmp.txt' % basefile):
                        os.unlink('%s.r.tmp.txt' % basefile)
                except:
                    self.errorLog('SLiMJIM visualisation error for "%s"' %
                                  mapping)
                    ex += 1
            self.printLog('#SLIMJIM',
                          'Generation of SLiMJIMs complete. %d Problems.' % ex)

        except:
            self.errorLog(rje_zen.Zen().wisdom())
Esempio n. 47
0
	def run(self):		### Main Run method
		'''
		Main Run method.
		'''
		try:
			### SLiMDisc Run ###
			if self.opt['SLiMDisc']:
				return self.slimDisc()
			
			### TEIRESIAS ###
			if self.opt['Teiresias']:
				## Setup ##
				seqlist = rje_seq.SeqList(self.log,self.cmd_list)
				infile = '%s.teiresias.fas' % rje.baseFile(seqlist.info['Name'],True)
				outfile = '%s.teiresias.out' % rje.baseFile(seqlist.info['Name'],True)
				run_teiresias = True
				if rje.isYounger(outfile,infile) == outfile:
					if self.stat['Interactive'] < 1 or not rje.yesNo('%s and %s exist already. Regenerate?' % (infile,outfile),'N'):
						run_teiresias = False
				## Run TEIRESIAS ##
				if run_teiresias:
					seqlist.saveFasta(seqfile=infile,name='Teiresias')	### Saves sequences in fasta format
					command = rje.makePath(self.info['TeiresiasPath'],True)
					command += ' -i%s -o%s %s' % (infile,outfile,self.info['TeiresiasOpt'])
					self.log.printLog('#CMD',command)
					os.system(command)
				## Read Results ##
				self.verbose(0,2,'Reading TEIRESIAS output from %s...' % outfile,1)
				self.list['Pattern'] = []
				RESULTS = open(outfile,'r')
				line = RESULTS.readline()
				while line:
					if rje.matchExp('^(\d+)\s+(\d+)\s+(\S+)\s+(\d.+\d)$',line): # New pattern
						self.addTeiresiasPattern(rje.matchExp('^(\d+)\s+(\d+)\s+(\S+)\s+(\d.+\d)$',line))
					elif len(line) > 3 and line[0] != '#':
						self.log.errorLog('Did not recognise line: %s' % line,False,False)
					line = RESULTS.readline()
				RESULTS.close()
				patx = len(self.list['Pattern'])
				self.log.printLog('#PAT','%s TEIRESIAS patterns read from %s.' % (rje.integerString(patx),outfile))
				## Calculate Information Content ##
				aafreq = seqlist.aaFreq()
				self.verbose(0,3,'Calculating Information Content & Length stats...',0)
				occx = 0
				for pattern in self.list['Pattern']:
					pattern.stat['Info'] = self.calculateScore(pattern.info['Pattern'],aafreq)
					pattern._makeLength()
					occx += 1
					rje.progressPrint(self,occx,patx/100,patx/10)
				self.verbose(0,1,'...Done!',2)
				## Prepare Results ##
				delimit = rje.getDelimit(self.cmd_list)
				if self.info['Name'] == 'None':
					self.info['Name'] = '%s.teiresias.%s' % (rje.baseFile(seqlist.info['Name'],True),rje.delimitExt(delimit))
				if self.opt['MySQL']:	# Two tables
					patfile = os.path.splitext(self.info['Name'])
					occfile = '%s.occ%s' % (patfile[0],patfile[1])
					patfile = '%s.patterns%s' % (patfile[0],patfile[1])
					if self.opt['Append']:
						PATFILE = open(patfile,'a')
						OCCFILE = open(occfile,'a')
					else:
						PATFILE = open(patfile,'w')
						rje.writeDelimit(PATFILE,['pattern','tot_occ','seq_occ','info','len','fix','wild'],delimit)
						OCCFILE = open(occfile,'a')
						rje.writeDelimit(OCCFILE,['seq_id','pos','pattern','pat_match'],delimit)
				else:
					if self.opt['Append']:
						RESFILE = open(self.info['Name'],'a')
					else:
						RESFILE = open(patfile,'w')
						rje.writeDelimit(RESFILE,['Sequence Name','Position','Pattern','Match','Total Occurrences','Num Sequences','Information Content','Length','Fixed','Wildcard'],delimit)
				## Save Results ##
				occx = 0
				for pattern in self.list['Pattern']:
					patstats = []
					for stat in ['OccCount','SeqCount','Info','Length','Fixed','Wildcards']:
						patstats.append('%d' % pattern.stat[stat])
					patstats[2] = '%.3f' % pattern.stat['Info']
					if self.opt['MySQL']:	# Two tables
						rje.writeDelimit(PATFILE,[pattern.info['Pattern']] + patstats,delimit)
					for occ in rje.sortKeys(pattern.occ):
						seq = seqlist.seq[occ]
						for pos in pattern.occ[occ]:
							match = seq.info['Sequence'][pos:(pos+pattern.stat['Length'])]
							outlist = [seq.shortName(),'%d' % pos,pattern.info['Pattern'],match]
							if self.opt['MySQL']:	# Two tables
								rje.writeDelimit(OCCFILE,outlist,delimit)
							else:
								rje.writeDelimit(RESFILE,outlist+patstats,delimit)
							occx += 1
				if self.opt['MySQL']:	# Two tables
					PATFILE.close()
					OCCFILE.close()
					self.log.printLog('#OUT','%s patterns output to %s.' % (rje.integerString(patx),patfile))
					self.log.printLog('#OUT','%s pattern occurrences output to %s.' % (rje.integerString(occx),occfile))
				else:
					RESFILE.close()
					self.log.printLog('#OUT','%s occurrences of %s patterns output to %s.' %
									  (rje.integerString(occx),rje.integerString(patx),self.info['Name']))

			### InfoContent ###
			elif self.info['Info'] != 'None':
				## Setup ##
				alphabet = rje_seq.alph_protx 
				if not os.path.exists(self.info['Info']):
					self.log.errorLog('Input file %s missing!' % self.info['Info'],False,False)
					return False
				else:
					mypresto = presto.Presto(self.log,self.cmd_list)
					mypresto.loadMotifs(file=self.info['Info'],clear=True)
				seqlist = rje_seq.SeqList(self.log,self.cmd_list+['autoload=T'])
				if seqlist.seqNum() > 0:
					aafreq = seqlist.aaFreq(alphabet=None,fromfile=None,loadfile=None,total=False)  ### Returns dictionary of AA (& gap etc.) frequencies
				else:
					aafreq = {}
					for aa in alphabet:
						aafreq[aa] = 1.0 / len(alphabet)
				alphabet = aafreq.keys()
				maxinfo = 0 
				for aa in alphabet:
					maxinfo +=  (aafreq[aa] * math.log(aafreq[aa],2))
				## Output ##
				delimit = rje.getDelimit(self.cmd_list)
				ext = rje.delimitExt(delimit)
				outfile = '%s.info.%s' % (rje.baseFile(self.info['Info'],True,['.txt','.%s' % ext]),ext)
				if self.opt['Append']:
					OUTFILE = open(outfile,'a')
				else:
					OUTFILE = open(outfile,'w')
					rje.writeDelimit(OUTFILE,['motif','pattern','info'],delimit)
				
				## Calculate Information Scores ##
				for motif in mypresto.motif:
					self.verbose(2,4,motif.info['Sequence'],0)
					pattern = string.replace(motif.info['Sequence'],'X','.')
					elements = string.split(pattern,'-')
					pattern = ''
					for el in elements:
						if el.find('.{') == 0:	# Ambiguous spacer length - compress
							pattern += '.'
						else:
							pattern += el
					self.verbose(2,2,'=> %s' % pattern,1)
					motif.stat['Info'] = self.calculateInformationContent(pattern,aafreq,maxinfo,self.stat['InfoGapPen'])
					self.verbose(0,3,'%s (%s) = %.2f' % (motif.info['Name'],pattern,motif.stat['Info']),1)
					## Output ##
					rje.writeDelimit(OUTFILE,[motif.info['Name'],pattern,'%.2f' % motif.stat['Info']],delimit)
				
				## Finish ##
				OUTFILE.close()
		except:
			self.log.errorLog('Error in run().',printerror=True,quitchoice=False)
			raise	# Delete this if method error not terrible
Esempio n. 48
0
 def readHMMPFamSearch(
         self,
         resfile=None,
         readaln=False):  ### Reads HMM PFam Search Results into objects
     '''
     Reads HMM Search Results into objects.
     >> resfile:str = Results File (set as self.info['OutFile'])
     >> readaln:boolean = whether to bother reading Alignments into objects [False] !!! Currently always False !!!
     '''
     try:
         ### Setup ###
         if not resfile or not os.path.exists(resfile):
             self.log.errorLog('Results file "%s" missing!' % resfile,
                               printerror=False)
             return False
         ## Make RegExp for starting next alignment ##
         re_hit = string.join([
             '^(\S+):', 'domain', '(\d+)', 'of', '(\d+),', 'from', '(\d+)',
             'to', '(\d+):', 'score', '(\S+),', 'E', '=', '(\S+)'
         ], '\s+')
         ## Search dictionary as results come back per sequence, not per HMM! ##
         pfam = {}  # Dictionary of {PFam name:search}
         hitx = 0  # Total number of hits
         hitlist = [
         ]  # List of sequences processed from file (may or may not include zero hit sequences)
         ### Read in Search results ###
         if open(resfile, 'r').readline().find('hmmpfam') != 0:
             self.errorLog(
                 'File "%s" does not appear to be an hmmpfam results file' %
                 resfile,
                 printerror=False)
             if rje.yesNo(
                     'Delete incorrect results file? (Check that hmmpfam=T is right!)',
                     default='N'):
                 os.unlink(resfile)
                 self.printLog('#DEL',
                               'Dodgy results file "%s" deleted.' % resfile)
             return False
         hitname = None
         i = 0
         hx = 0
         seqx = 0
         RESFILE = open(resfile, 'r')
         #x#resline = self.loadFromFile(resfile,chomplines=True)
         #x#while i < len(resline):
         line = RESFILE.readline()
         newres = [rje.chomp(line)]
         newresout = True
         newresfile = '%s.partial' % resfile
         if os.path.exists(newresfile): os.unlink(newresfile)
         while line:
             self.progLog(
                 '\r#RES', 'Reading %s: %s Seqs; %s Domains; %s Hits' %
                 (resfile, rje.integerString(hx),
                  rje.integerString(len(pfam)), rje.integerString(hitx)))
             line = rje.chomp(line)
             #print line
             ## New Sequence ##
             if rje.matchExp('^Query sequence:\s+(\S+)', line):
                 if newres and newresout and self.opt['CleanRes']:
                     open(newresfile, 'a').write(string.join(newres, '\n'))
                 newres = ['', line]
                 newresout = False
                 hitname = rje.matchExp('^Query sequence:\s+(\S+)', line)[0]
                 hx += 1
                 #x#if hitname not in hitlist: hitlist.append(hitname)
             ## One Line Data for hits ##
             elif line.find('Parsed for domains:') == 0:
                 #x#i += 3      # Skip two complete lines
                 newres += [
                     line,
                     rje.chomp(RESFILE.readline()),
                     rje.chomp(RESFILE.readline())
                 ]
                 line = rje.chomp(RESFILE.readline())
                 newres.append(line)
                 #Model           Domain  seq-f seq-t    hmm-f hmm-t      score  E-value
                 #--------        ------- ----- -----    ----- -----      -----  -------
                 #Lep_receptor_Ig   1/1      24   114 ..     1   103 []   158.4  1.7e-44
                 # ... else ...
                 #         [no hits above thresholds]
                 while rje.matchExp(
                         string.join([
                             '^(\S+)', '\S+', '(\d+)', '(\d+)\D.+', '(\S+)',
                             '(\S+)\s*$'
                         ], '\s+'), line):
                     newresout = True
                     (dom, start, end, score, eval) = rje.matchExp(
                         string.join([
                             '^(\S+)', '\S+', '(\d+)', '(\d+)\D.+', '(\S+)',
                             '(\S+)\s*$'
                         ], '\s+'), line)
                     if not pfam.has_key(dom):
                         pfam[dom] = self._addSearch()
                         pfam[dom].info['Name'] = dom
                     hit = pfam[dom]._addHit()
                     hit.info['Name'] = hitname
                     aln = hit._addAln()
                     aln.setStat({
                         'SbjStart': string.atoi(start),
                         'SbjEnd': string.atoi(end),
                         'Expect': string.atof(eval),
                         'BitScore': string.atof(score)
                     })
                     hitx += 1
                     self.progLog(
                         '\r#RES',
                         'Reading %s: %s Seqs; %s Domains; %s Hits' %
                         (resfile, rje.integerString(hx),
                          rje.integerString(
                              len(pfam)), rje.integerString(hitx)))
                     line = rje.chomp(RESFILE.readline())
                     newres.append(line)
             ## End of Protein ##
             elif line[:2] == '//':
                 hitname = None
                 newres.append(line)
             elif rje.matchExp(
                     'End of rje_hmm reduced results file: (%d) sequences in original',
                     line):
                 seqx = string.atoi(
                     rje.matchExp(
                         'End of rje_hmm reduced results file: (\d+) sequences in original',
                         line)[0])
             elif newres:
                 newres.append(line)
             #x#i += 1
             line = RESFILE.readline()
         if newres and newresout and self.opt['CleanRes']:
             open(newresfile, 'a').write(string.join(newres, '\n'))
         if not seqx: seqx = hx
         if self.opt['CleanRes']:
             open(newresfile, 'a').write(
                 string.join([
                     '',
                     'End of rje_hmm reduced results file: %d sequences in original'
                     % seqx
                 ], '\n'))
             os.unlink(resfile)
             os.rename(newresfile, resfile)
             self.printLog(
                 '\r#RED',
                 'Results file %s replaced with reduced version (%s Hits only)'
                 % (resfile, rje.integerString(hitx)))
         self.printLog(
             '\r#RES', 'Reading %s complete: %s Seqs; %s Domains; %s Hits' %
             (resfile, rje.integerString(seqx), rje.integerString(
                 len(pfam)), rje.integerString(hitx)))
         return True
     except:
         self.log.errorLog('Calamity during readHMMSearch(%s)' % (resfile))
         return False
Esempio n. 49
0
def menu(callobj,headtext='',menulist=[],choicetext='Please select:',changecase=True,default=''):   ### Main Menu method
    '''
    Main Menu method.
    >> callobj:Object for which attributes are to be read and altered. Also controls interactivity and log.
    >> headtext:str [''] = Introductory text for menu system.
    >> menulist:list [] = List of menu item tuples (edit code,description,optiontype,optionkey)
        - e.g. ('0','Sequence file','info','Name') would edit callobj.info['Name'])
        - If optiontype == 'return' then menu will return the value given in optionkey
        - If optiontype == '' then description will be printed as a breaker
        - If optiontype == 'infile' then callobj.info['Name'] would be changed using rje.getFileName(mustexist=True)
        - If optiontype == 'outfile' then callobj.info['Name'] would be changed using rje.getFileName(confirm=True)
        - If optiontype == 'showtext' then optionkey should contain text to be printed with verbose
        - If optiontype == 'addcmd' then commands can be added.
    >> choicetext:str ['Please select:'] = Text to display for choice option
    >> changecase:boolean [True] = change all choices and codes to upper text 
    << returns optionkey if appropriate, else True
    '''
    try:### ~ [0] Setup ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
        ## ~ [0a] Choice Dictionary ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
        choicedict = {}
        for (code,desc,type,key) in menulist:
            if not type: continue
            if changecase: choicedict[code.upper()] = (type,key)
            else: choicedict[code] = (type,key)
        ## ~ [0b] Setup Header Text ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
        maxlen = 0
        for line in string.split(headtext,'\n'):
            if len(line) > maxlen: maxlen = len(line)
        headlist = ['#' * (maxlen + 10)]
        for line in string.split(headtext,'\n')[0:]:
            while len(line) < maxlen: line += ' '
            headlist.append('# #> %s <# #' % line)
        headlist.append(headlist[0])
        ### ~ [1] Main Menu Loop ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
        while menulist:
            ## ~ [1a] Setup ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
            mtxt = '\n%s' % string.join(headlist,'\n')
            while mtxt[-2:] != '\n\n': mtxt += '\n'
            for (code,desc,type,key) in menulist:
                if type and (code or desc):
                    mtxt += '<%s> %s' % (code,desc)
                    if type in ['info','list','opt','stat','int','str','bool','num']: mtxt += ': %s' % callobj.getAttribute(type,key,default='#!#ERROR#!#')
                    elif type in ['infile','outfile']: mtxt += ': %s' % callobj.getAttribute('info',key,default='#!#ERROR#!#')
                else: mtxt += desc
                mtxt += '\n'
            ## ~ [1b] Give Choices ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
            print mtxt
            while mtxt:
                try:## ~ Input user choice ~~~ ##
                    choice = rje.choice(choicetext,default=default)
                    if changecase: choice = choice.upper()
                    ## ~ Process user choice ~ ##
                    if choicedict.has_key(choice):
                        (type,key) = choicedict[choice]
                        if type in ['str','info']: callobj.setInfo({key:callobj._editChoice(key,callobj.getStr(key))})
                        if type in ['num','stat']: callobj.setStat({key:callobj._editChoice(key,callobj.getNum(key),numeric=True)})
                        if type == 'int': callobj.setStat({key:int(callobj._editChoice(key,callobj.getInt(key),numeric=True))})
                        if type in ['bool','opt']: callobj.setOpt({key: not callobj.getBool(key)})
                        if type == 'list': callobj.list[key] = string.split(callobj._editChoice(key,callobj.list[key]))
                        if type == 'infile': callobj.setInfo({key: rje.getFileName('%s File Name?' % key,callobj.getStr(key))})
                        if type == 'outfile': callobj.setInfo({key: rje.getFileName('%s File Name?' % key,callobj.getStr(key),mustexist=False,confirm=True)})
                        if type == 'showtext': callobj.verbose(-1,-1,key); break
                        if type == 'addcmd':
                            prevcmd = callobj.cmd_list
                            callobj.cmd_list = rje.inputCmds(out,prevcmd)
                            callobj.printLog('#CMD','User Added commands: %s' % callobj.cmd_list)
                            callobj._cmdList()
                            callobj.cmd_list = prevcmd + callobj.cmd_list
                            break
                        if type in ['info','list','opt','stat','infile','outfile','str','bool','int','num']:
                            callobj.printLog('#%s' % type.upper(),'User edited %s parameter' % key); break
                        elif type == 'return': return key
                    print 'Choice "%s" not recognised!\n' % choice
                except KeyboardInterrupt:
                    if rje.yesNo('Terminate program?'): raise
                    if rje.yesNo('Exit menu and proceed?'):
                        if default: return default
                        else: return True
                except: raise
        ### End ###
        return True
    except KeyboardInterrupt: raise
    except:
        if callobj: callobj.errorLog('Major disaster in rje_menu.menu()',quitchoice=True)
        else: raise