Esempio n. 1
0
 def loadXRef(self):  ### Load Identifier XRef Data
     '''Load Identifier XRef Data.'''
     try:  ### ~ [1] Setup ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         if rje.exists('%s.xref.tdt' %
                       self.info['Basefile']) and not self.opt['Force']:
             return self.db().addTable('%s.xref.tdt' %
                                       self.info['Basefile'],
                                       mainkeys=['#'],
                                       datakeys='All',
                                       name='XRef')
         if not rje.checkForFile(self.info['XRef']): return False
         changehead = {
             'Ensembl Gene ID': 'EnsG',
             'Ensembl Protein ID': 'EnsP',
             'Associated Gene Name': 'Gene',
             'Associated Gene DB': 'GeneDB',
             'UniProt/SwissProt ID': 'UniprotID',
             'UniProt/SwissProt Accession': 'UniProt',
             'SGD Gene': 'SGD'
         }
         ### ~ [2] Load data ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         xref = self.db().addTable(self.info['XRef'],
                                   mainkeys='All',
                                   datakeys='All',
                                   name='XRef')
         for field in changehead:
             if field in xref.fields():
                 xref.renameField(field, changehead[field])
         xref.saveToFile('%s.xref.tdt' % self.info['Basefile'])
         return xref
     except:
         self.errorLog(rje_zen.Zen().wisdom())
         raise  # Delete this if method error not terrible
Esempio n. 2
0
 def setup(self):    ### Main class setup method.
     '''Main class setup method.'''
     try:### ~ [0] Setup File names etc. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         if self.getStr('SaveDis').lower() in ['','none']:
             base = 'peptides'
             if rje.checkForFile(self.getStr('Peptides')): base = rje.baseFile(self.getStr('Peptides'))
             if self.baseFile().lower() not in ['','none']: base = self.baseFile()
             self.baseFile(base)
             self.setStr({'SaveDis':'%s.%s.%s' % (base,self.getStr('PeptDis'),self.getStr('PeptCluster'))})
         if self.getStr('OutMatrix') in ['tdt','csv','png','phylip']: self.str['SaveDis'] += '.%s' % self.getStr('OutMatrix')[:3]
         else: self.str['SaveDis'] += '.txt'
         self.dict['Output']['peptides'] = string.join(self.list['Peptides'],'\n')
         ### ~ [1] Setup Distance Matrix ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         self.obj['AADis'] = rje_dismatrix.DisMatrix(self.log,['nsf2nwk=T']+self.cmd_list)
         self.obj['AADis'].info['Name'] = 'Pairwise AA distances'
         self.obj['PeptDis'] = rje_dismatrix.DisMatrix(self.log,['nsf2nwk=T']+self.cmd_list)
         self.obj['PeptDis'].info['Name'] = 'Pairwise peptide distances'
         ### ~ [2] Optional loading of AA Distance Matrix ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         if self.getStr('AADis').lower() not in ['','none']: self.obj['AADis'].loadMatrix(self.getStr('AADis'))
         else:
             self.obj['AAProp'] = aaprop = rje_aaprop.AAPropMatrix(self.log,self.cmd_list)
             #aaprop.readAAProp()    # Does this on loading!
             for aa in aaprop.pdif: self.obj['AADis'].addDis(aa[0],aa[1],aaprop.pdif[aa])
         return True
     except: self.errorLog('Problem during %s setup.' % self); return False  # Setup failed
Esempio n. 3
0
 def processHHPID(self):  ### Process HHPID interactions
     '''Process HHPID interactions.'''
     try:  ### ~ [1] Setup ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         if rje.checkForFile('%s.HHPIDMap.tdt' % self.basefile()):
             mdb = self.db().addTable('%s.HHPIDMap.tdt' % self.basefile(),
                                      ['HIV', 'Gene'],
                                      'All',
                                      name='HHPIDMap')
             return mdb
         hdb = self.db('HHPID')
         gdb = self.db('GeneMap')
         pdb = self.db('PPI')
         mdb = self.db().joinTables(name='HHPIDMap',
                                    join=[(hdb, 'Entrez'), (gdb, 'Entrez')],
                                    newkey=['#'],
                                    empties=False,
                                    keeptable=True)
         for field in mdb.fields()[0:]:
             if field not in [
                     '#', 'AccHIV', 'EntrezHIV', 'HIV', 'Entrez', 'Gene',
                     'Symbol', 'UniProt', 'EnsEMBL', 'EnsLoci'
             ]:
                 mdb.dropField(field)
         mdb.compress(['HIV', 'Gene'], default='str')
         mdb.dropField('#')
         mdb.saveToFile()
         ### ~ [2] Save viral accession numbers ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         open('%s.hivacc' % self.getStr('Basefile'), 'w').write(
             '%s\n' % string.join(rje.sortKeys(mdb.index('AccHIV')), '\n'))
         return mdb
     except:
         self.errorLog('%s.processHHPID error' % self)
         return False
Esempio n. 4
0
 def hmmSearch(self,hmm,dbase=None,outfile=None,wait=True):    ### Performs HMMer Search using object attributes
     '''
     Performs HMMer Search using object attributes.
     >> hmm:str = Name of HMM file 
     >> dbase:str = Name of DBase file [self.info['SearchDB']]
     >> outfile:str = Name of Output file file [self.info['HMMOut']]
     >> wait:boolean  = whether to wait for HMMer. [True]
     << returns outfile or None if fails
     '''
     try:### ~ [1] ~ Setup ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         ## ~ [1a] ~ Input files ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
         if not rje.checkForFile(hmm): self.printLog('#ERR','HMM file %s is missing!' % hmm); return None
         if not dbase: dbase = self.info['SearchDB']
         if not rje.checkForFile(dbase): self.printLog('#ERR','Database file "%s" is missing!' % dbase); return None
         ## ~ [1b] ~ Output file ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
         if not outfile or outfile.lower() in ['','none']:       # Make an outfile per search
             outfile = '%s.%s.hmmer' % (rje.baseFile(hmm,True),rje.baseFile(dbase,True))
             resfile = outfile
             if not os.path.exists(outfile) and self.opt['GZip'] and os.path.exists('%s.gz' % outfile) and not self.opt['Force']:
                 resfile = '%s.gz' % outfile
             if not self.opt['Force'] and rje.isYounger(resfile,hmm) == resfile and rje.isYounger(resfile,dbase) == resfile:
                 self.printLog('#HMM','HMM results file "%s" exists.' % resfile)
                 return outfile      # Already exists
             else: rje.backup(self,outfile,unlink=True)
         ### ~ [2] ~ HMM Search ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         if self.opt['HMMPFam']:
             _command = 'hmmpfam --cut_ga %s %s %s > %s' % (string.join(self.list['HMMOptions']),hmm,dbase,outfile)
         else: _command = 'hmmsearch %s %s %s > %s' % (string.join(self.list['HMMOptions']),hmm,dbase,outfile)
         self.log.printLog('#HMM',_command)
         if not wait: os.system(self.info['HMMerPath'] + _command + ' &')
         elif not os.path.exists(outfile) or self.opt['Force']: open(outfile,'a').write(os.popen(self.info['HMMerPath'] + _command).read())
         self.printLog('#HMM','Outfile produced for %s: %s.' % (hmm,outfile))
         if self.opt['GZip']:
             rje.backup(self,'%s.gz' % outfile,unlink=True)
             os.system('gzip %s' % outfile)
             self.printLog('#GZIP','%s gzipped to save space' % outfile)
         return outfile
     except:
         self.log.errorLog('Fatal Error during hmmSearch(%s)' % hmm)
         return None
Esempio n. 5
0
 def loadPPI(self):  ### Load pairwise interaction data
     '''Load pairwise interaction data.'''
     try:### ~ [1] Setup ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         if not rje.checkForFile(self.info['PPIFile']): return False
         ### ~ [2] Load data ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         for line in open(self.info['PPIFile'],'r').readlines():
             try: [pa,pb] = string.split(rje.chomp(line))[:2]
             except: continue
             for ppi in [(pa,pb),(pb,pa)]:
                 if ppi[0] not in self.dict['PPI']: self.dict['PPI'][ppi[0]] = []
                 if ppi[1] not in self.dict['PPI'][ppi[0]]: self.dict['PPI'][ppi[0]].append(ppi[1])
             self.progLog('\r#PPI','Loading PPI data: %s proteins' % rje.integerString(len(self.dict['PPI'])))
         self.printLog('\r#PPI','Loaded PPI data for %s proteins' % rje.integerString(len(self.dict['PPI'])))
     except: self.errorLog(rje_zen.Zen().wisdom()); raise   # Delete this if method error not terrible
Esempio n. 6
0
 def loadXRef(self):     ### Load Identifier XRef Data
     '''Load Identifier XRef Data.'''
     try:### ~ [1] Setup ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         if rje.exists('%s.xref.tdt' % self.info['Basefile']) and not self.opt['Force']: 
             return self.db().addTable('%s.xref.tdt' % self.info['Basefile'],mainkeys=['#'],datakeys='All',name='XRef')
         if not rje.checkForFile(self.info['XRef']): return False
         changehead = {'Ensembl Gene ID':'EnsG','Ensembl Protein ID':'EnsP','Associated Gene Name':'Gene',
                       'Associated Gene DB':'GeneDB','UniProt/SwissProt ID':'UniprotID',
                       'UniProt/SwissProt Accession':'UniProt','SGD Gene':'SGD'}
         ### ~ [2] Load data ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         xref = self.db().addTable(self.info['XRef'],mainkeys='All',datakeys='All',name='XRef')
         for field in changehead:
             if field in xref.fields(): xref.renameField(field,changehead[field])
         xref.saveToFile('%s.xref.tdt' % self.info['Basefile']); return xref
     except: self.errorLog(rje_zen.Zen().wisdom()); raise   # Delete this if method error not terrible
Esempio n. 7
0
 def loadPillars(self):  ### Load YGOB Pillar data
     '''Load YGOB Pillar data.'''
     try:### ~ [1] Setup ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         if not rje.checkForFile(self.info['Pillars']): return False
         ### ~ [2] Load data ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         for line in self.loadFromFile(filename=self.info['Pillars'],chomplines=True):
             pillars = string.split(line)
             #self.deBug('%s = %d' % (pillars,len(pillars)))
             if len(pillars) < 17: continue
             pillars = pillars[:5] + pillars[6:]     # Remove ancestral gene
             while '---' in pillars: pillars.remove('---')
             #self.deBug('%s = %d' % (pillars,len(pillars)))
             if pillars: self.list['Pillars'].append(pillars)
             self.progLog('\r#YGOB','Loading Pillar data: %s loci' % rje.integerString(len(self.list['Pillars'])))
         self.printLog('\r#YGOB','Loaded Pillar data for %s loci' % rje.integerString(len(self.list['Pillars'])))
     except: self.errorLog(rje_zen.Zen().wisdom()); raise   # Delete this if method error not terrible
Esempio n. 8
0
 def processHHPID(self): ### Process HHPID interactions
     '''Process HHPID interactions.'''
     try:### ~ [1] Setup ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         if rje.checkForFile('%s.HHPIDMap.tdt' % self.basefile()):
             mdb = self.db().addTable('%s.HHPIDMap.tdt' % self.basefile(),['HIV','Gene'],'All',name='HHPIDMap')
             return mdb
         hdb = self.db('HHPID')
         gdb = self.db('GeneMap')
         pdb = self.db('PPI')
         mdb = self.db().joinTables(name='HHPIDMap',join=[(hdb,'Entrez'),(gdb,'Entrez')],newkey=['#'],empties=False,keeptable=True)
         for field in mdb.fields()[0:]:
             if field not in ['#','AccHIV','EntrezHIV','HIV','Entrez','Gene','Symbol','UniProt','EnsEMBL','EnsLoci']: mdb.dropField(field)
         mdb.compress(['HIV','Gene'],default='str'); mdb.dropField('#')
         mdb.saveToFile()
         ### ~ [2] Save viral accession numbers ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         open('%s.hivacc' % self.getStr('Basefile'),'w').write('%s\n' % string.join(rje.sortKeys(mdb.index('AccHIV')),'\n'))
         return mdb
     except: self.errorLog('%s.processHHPID error' % self); return False
Esempio n. 9
0
 def loadPillars(self):  ### Load YGOB Pillar data
     '''Load YGOB Pillar data.'''
     try:  ### ~ [1] Setup ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         if not rje.checkForFile(self.info['Pillars']): return False
         ### ~ [2] Load data ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         for line in self.loadFromFile(filename=self.info['Pillars'],
                                       chomplines=True):
             pillars = string.split(line)
             #self.deBug('%s = %d' % (pillars,len(pillars)))
             if len(pillars) < 17: continue
             pillars = pillars[:5] + pillars[6:]  # Remove ancestral gene
             while '---' in pillars:
                 pillars.remove('---')
             #self.deBug('%s = %d' % (pillars,len(pillars)))
             if pillars: self.list['Pillars'].append(pillars)
             self.progLog(
                 '\r#YGOB', 'Loading Pillar data: %s loci' %
                 rje.integerString(len(self.list['Pillars'])))
         self.printLog(
             '\r#YGOB', 'Loaded Pillar data for %s loci' %
             rje.integerString(len(self.list['Pillars'])))
     except:
         self.errorLog(rje_zen.Zen().wisdom())
         raise  # Delete this if method error not terrible
Esempio n. 10
0
 def loadPPI(self):  ### Load pairwise interaction data
     '''Load pairwise interaction data.'''
     try:  ### ~ [1] Setup ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         if not rje.checkForFile(self.info['PPIFile']): return False
         ### ~ [2] Load data ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         for line in open(self.info['PPIFile'], 'r').readlines():
             try:
                 [pa, pb] = string.split(rje.chomp(line))[:2]
             except:
                 continue
             for ppi in [(pa, pb), (pb, pa)]:
                 if ppi[0] not in self.dict['PPI']:
                     self.dict['PPI'][ppi[0]] = []
                 if ppi[1] not in self.dict['PPI'][ppi[0]]:
                     self.dict['PPI'][ppi[0]].append(ppi[1])
             self.progLog(
                 '\r#PPI', 'Loading PPI data: %s proteins' %
                 rje.integerString(len(self.dict['PPI'])))
         self.printLog(
             '\r#PPI', 'Loaded PPI data for %s proteins' %
             rje.integerString(len(self.dict['PPI'])))
     except:
         self.errorLog(rje_zen.Zen().wisdom())
         raise  # Delete this if method error not terrible
Esempio n. 11
0
 def hmmSearch(
         self,
         hmm,
         dbase=None,
         outfile=None,
         wait=True):  ### Performs HMMer Search using object attributes
     '''
     Performs HMMer Search using object attributes.
     >> hmm:str = Name of HMM file 
     >> dbase:str = Name of DBase file [self.info['SearchDB']]
     >> outfile:str = Name of Output file file [self.info['HMMOut']]
     >> wait:boolean  = whether to wait for HMMer. [True]
     << returns outfile or None if fails
     '''
     try:  ### ~ [1] ~ Setup ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         ## ~ [1a] ~ Input files ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
         if not rje.checkForFile(hmm):
             self.printLog('#ERR', 'HMM file %s is missing!' % hmm)
             return None
         if not dbase: dbase = self.info['SearchDB']
         if not rje.checkForFile(dbase):
             self.printLog('#ERR', 'Database file "%s" is missing!' % dbase)
             return None
         ## ~ [1b] ~ Output file ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
         if not outfile or outfile.lower() in [
                 '', 'none'
         ]:  # Make an outfile per search
             outfile = '%s.%s.hmmer' % (rje.baseFile(
                 hmm, True), rje.baseFile(dbase, True))
             resfile = outfile
             if not os.path.exists(
                     outfile) and self.opt['GZip'] and os.path.exists(
                         '%s.gz' % outfile) and not self.opt['Force']:
                 resfile = '%s.gz' % outfile
             if not self.opt['Force'] and rje.isYounger(
                     resfile, hmm) == resfile and rje.isYounger(
                         resfile, dbase) == resfile:
                 self.printLog('#HMM',
                               'HMM results file "%s" exists.' % resfile)
                 return outfile  # Already exists
             else:
                 rje.backup(self, outfile, unlink=True)
         ### ~ [2] ~ HMM Search ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         if self.opt['HMMPFam']:
             _command = 'hmmpfam --cut_ga %s %s %s > %s' % (string.join(
                 self.list['HMMOptions']), hmm, dbase, outfile)
         else:
             _command = 'hmmsearch %s %s %s > %s' % (string.join(
                 self.list['HMMOptions']), hmm, dbase, outfile)
         self.log.printLog('#HMM', _command)
         if not wait: os.system(self.info['HMMerPath'] + _command + ' &')
         elif not os.path.exists(outfile) or self.opt['Force']:
             open(outfile, 'a').write(
                 os.popen(self.info['HMMerPath'] + _command).read())
         self.printLog('#HMM',
                       'Outfile produced for %s: %s.' % (hmm, outfile))
         if self.opt['GZip']:
             rje.backup(self, '%s.gz' % outfile, unlink=True)
             os.system('gzip %s' % outfile)
             self.printLog('#GZIP', '%s gzipped to save space' % outfile)
         return outfile
     except:
         self.log.errorLog('Fatal Error during hmmSearch(%s)' % hmm)
         return None
Esempio n. 12
0
    def pairwiseAQ(self,seqlist=None,query=None,focus=[0,0]):     ### Performs PAQ on seqlist, adding seq.info['PAQ']
        '''
        Performs PAQ on seqlist, adding seq.info['PAQ']
        >> seqlist:rje_seq.SeqList Object
        - NB. This object will itself have sequences removed from it, so beware!
        - A new info key will be added: PAQ = PAQ sequences with alignment Xs
        >> focus:list of range positions [X:Y] to look at. If Y=0 then [X:]. 
        '''
        ### <PAQ0> ### Setup
        try:
            _stage = '<0> Setup'
            haqlist = seqlist   # SeqList Object to store individually Xd sequences
            if not query:
                query = haqlist.obj['QuerySeq']
            if self.opt['NoQuery'] or not query:
                query = haqlist.seq[random.randint(0,haqlist.seqNum()-1)]
                self.log.printLog('#QRY','Temp (random) query %s assigned for PAQ' % query.shortName())
            #!# paqx = [False] * seqlist.seq[0].seqLen()    # List of whether a column of the alignment is bad (has an X) [True] or not [False]
            #!# - make this a method?!

            pwaq = {}    # Dictionary of lists of pairwise alignements
            block_align = {}    # Dictionary of whether residue in block of sequence that is well-aligned or not
            for seq in haqlist.seq:
                block_align[seq] = [False] * seq.seqLen()
                seq.info['PAQ'] = seq.info['Sequence'][0:]
                if seq.info.has_key('SAQX') and len(seq.info['SAQX']) == seq.seqLen():   #!# Should no longer be issues due to length changes following realignment
                    seq.info['Sequence'] = seq.info['SAQX'][0:]
                elif seq.info.has_key('SAQX'):
                    self.log.errorLog('Cannot use SAQX for %s in PAQ as wrong length.' % seq.shortName(),printerror=False)
                for otherseq in haqlist.seq:
                    pwaq[(seq,otherseq)] = [False] * seq.seqLen()

        ### <PAQ1> ### Directional Pairwise Comparisons of sequences
            _stage = '<1> Pairwise Comparisons'
            infotxt = 'PAQ%d: Pairwise Comparisons ...' % self.stat['PAQCyc']
            #print self.stat
            for seq in haqlist.seq:
                for otherseq in haqlist.seq:
                    myinfo = '%s %.1f%% %.1f%%   ' % (infotxt,(100.0 * haqlist.seq.index(seq) / haqlist.seqNum()),(100.0 * haqlist.seq.index(otherseq) / haqlist.seqNum()))
                    self.log.printLog('\r#PAQ',myinfo,log=False,newline=False)
                    for r in range(seq.seqLen()):
                        ar = seq.info['Sequence'][r]
                        ## <i> ## Look for PW aligned block
                        _stage = '<1-i> Pairwise Comparisons'
                        if ar not in ['-','X']: # Start of test block
                            blen = 0    # Block length (PAQBlock) = AAs
                            win = 0     # Window length = all sequence
                            matchx = 0  # Score for residues in window 
                            while blen < self.stat['PAQBlock'] and (r+win) < seq.seqLen():     # This time we allow overshoots in both directions
                                ar = seq.info['Sequence'][r+win]
                                at = otherseq.info['Sequence'][r+win]
                                if 'X' in [ar,at]:     # Hit Bad Region: Abort
                                    break
                                else:   # Better region
                                    if ar != '-':   
                                        blen += 1   # Increase Block
                                        matchx += self._saqCon(ar,at)
                                win += 1
                        ## <ii> ## Update pwaq if block good
                            _stage = '<1-ii> Pairwise Comparisons'
                            if matchx >= self.stat['PAQMatch']:
                                for w in range(win):
                                    if seq.info['Sequence'][r+w] in ['-','X']:
                                        pwaq[(seq,otherseq)][r+w] = False
                                    else:
                                        pwaq[(seq,otherseq)][r+w] = True           
            self.log.printLog('\r#PAQ','%s 100.0% 100.0%.   ' % infotxt,log=False)
                
        ### <PAQ2> ### Link back to Query
            _stage = '<2> Linking to Query'
            ### <PAQ2a> ### Network of Pairwise Quality alignments
            _stage = '<2a> Linking to Query'
            #self.verbose(1,3,'PAQ%d: Linking Residues to Query (%s)' % (self.stat['PAQCyc'],query.shortName()),0)
            infotxt = 'PAQ%d: Linking Residues to Query (%s) ...' % (self.stat['PAQCyc'],query.shortName())
            for r in range(query.seqLen()):
                _stage = '<2a> Linking to Query'
                self.log.printLog('\r#PAQ','%s %.1f%%' % (infotxt,(100.0 * r / query.seqLen())),log=False,newline=False)
                qok = {}    # Dictionary of whether residue in seq OK, i.e. linked to query
                for seq in haqlist.seq:
                    qok[seq] = False
                qok[query] = True
                sok = [0,1] # List of OK sequence for residue
                while sok[-2] != sok[-1]:
                    ## <i> ## Match pairs, starting with query
                    _stage = '<2a-i> Linking to Query'
                    for seq in haqlist.seq:
                        if qok[seq]:
                            for otherseq in haqlist.seq:
                                if pwaq[(seq,otherseq)][r] or pwaq[(otherseq,seq)][r]:
                                    qok[otherseq] = True
                    ## <ii> ## Update sok
                    _stage = '<2a-ii> Linking to Query'
                    sok.append(0)
                    for seq in haqlist.seq:
                        if qok[seq]:
                            sok[-1] += 1
                            block_align[seq][r] = True
                _stage = '<2a-iii> Linking to Query'
                if sok[-1] == 1:    # Only query OK!
                    block_align[query][r] = False
            self.log.printLog('\r#PAQ','%s 100.0%%' % infotxt,log=False)
            
            ### <PAQ2b> ### Allow for divergence (Conserved Anchors)
            _stage = '<2b> Anchors'
            if self.opt['Anchors']:
                infotxt = 'PAQ%d: Accounting for divergence within aligned regions ...' % self.stat['PAQCyc']
                ## <i> ## Setup gapped list
                gapped = [False] * query.seqLen()   # Whether column of alignment is gapped
                for seq in haqlist.seq:
                    self.log.printLog('\r#PAQ','%s %.1f%%  ' % (infotxt,(50.0 * haqlist.seq.index(seq) / haqlist.seqNum())),log=False,newline=False)
                    (start,end) = (0,seq.seqLen())
                    while seq.info['Sequence'][start] == '-':
                        start += 1
                    while seq.info['Sequence'][end-1] == '-':
                        end -=1
                    for r in range(start,end):
                        if seq.info['Sequence'][r] == '-':
                            gapped[r] = True
                ## <ii> ## Correction
                for seq in haqlist.seq:
                    self.log.printLog('\r#PAQ','%s %.1f%%  ' % (infotxt,(50 + (50.0 * haqlist.seq.index(seq) / haqlist.seqNum()))),log=False,newline=False)
                    for r in range(seq.seqLen()):
                        if block_align[seq][r] or gapped[r]:    # No need for correction
                            continue
                        # Move in both directions: if good residues (or sequence end) reached before gaps then reinstate
                        winf = 0
                        fwd = True
                        fok = False
                        winb = 0
                        bwd = True
                        bok = False
                        while fwd or bwd:
                            # End of seqs
                            if (r + winf) >= seq.seqLen():
                                fwd = False
                            if (r - winb) < 0:
                                bwd = False
                            # Gaps/OK
                            if fwd:
                                if gapped[r+winf]:
                                    fok = False
                                    fwd = False
                                elif block_align[seq][r+winf]:
                                    fwd = False
                                else:
                                    winf += 1
                            if bwd:
                                if gapped[r-winb]:
                                    bok = False
                                    bwd = False
                                elif block_align[seq][r-winb]:
                                    bwd = False
                                else:
                                    winb += 1
                        if fok and bok: # Reinstate
                            for w in range(r-winb,r+winf+1):
                                block_align[seq][w] = True
                self.log.printLog('\r#PAQ','%s 100.0%%  ' % infotxt,log=False)

        ### <PAQ3> ### X out badly-aligned blocks
            _stage = '<3> Making bad sequence blocks'
            for seq in haqlist.seq:
                newseq = ''
                for r in range(seq.seqLen()):
                    if block_align[seq][r] or seq.info['Sequence'][r] == '-':
                        newseq += seq.info['Sequence'][r]
                    else: # Bad residue
                        newseq += 'X'
                seq.info['Sequence'] = newseq[0:]
            #!# Add saving of data in 'datafull' option

        ### <PAQ4> ### Remove sequences and/or badly-aligned regions
            _stage = '<4> Removing sequences/regions'
            self.verbose(0,4,'PAQ%d: Removing bad sequences and/or dodgy regions...' % self.stat['PAQCyc'],0)
            ## <PAQ4a> ## Process Query first - only interested in good regions within query
            if self.opt['NoQuery']:  # No preprocessing of Query
                self.verbose(0,4,'no Master Query processing...',0)
            else:
                haqlist.mapX(query, qtrim=True, focus=focus) # Replaces other sequence ends and query X columns with Xs
                self.verbose(0,4,'Query (%s) processed...' % query.shortName(),0)
            self.verbose(0,3,'',1)
            if self.opt['ManPAQ']:
                haqlist.saveFasta(seqfile='%s.manpaq.fas' % haqlist.info['Basefile'])

            ## <PAQ4b> ## Cycle through other sequences (worst first) until no more good residues are lost
            goodres = [0, self._getGood(haqlist.seq)]   # List of number of 'good' residues
            goodseq = [0, haqlist.seqNum()]
            while goodres[-1] != goodres[-2] or goodseq[-1] != goodseq[-2]:
                colgood = [0] * haqlist.seq[0].seqLen()    # Good residues per column
                for r in range(haqlist.seq[0].seqLen()):
                    for seq in haqlist.seq:
                        if seq.info['Sequence'][r] != '-' and seq.info['Sequence'][r] != 'X':
                            colgood[r] += 1
                ## <i> ## Compare relative loss of masking and losing each sequence
                keepx = {}  # Dictionary of seq:number of lost residues if seq kept
                losex = {}  # Dictionary of seq:number of lost residues if seq lost
                badkx = -1  # Biggest loss if kept
                badlx = -1  # Biggest loss if lost
                bads = None # Worst sequence
                for seq in haqlist.seq:
                    if seq == query and self.opt['NoQuery'] == False:
                        continue    # Next sequence
                    # Calculate keepx and losex
                    keepx[seq] = 0
                    for r in range(seq.seqLen()):
                        if seq.info['Sequence'][r] == 'X':
                            keepx[seq] += colgood[r]
                        #?# In Perl HAQESAC there was an option to ignore Orphans in this calculation. Reinstate?
                    losex[seq] = self._getGood([seq])
                    # Update bads if worse
                    if keepx[seq] > badkx:
                        badkx = keepx[seq]
                        badlx = losex[seq]
                        bads = seq
                    elif keepx[seq] == badkx and losex[seq] < badlx:
                        badlx = losex[seq]
                        bads = seq
                ## <ii> ## Remove bad sequences and/or regions
                if badkx > 0:
                    if self.opt['ManPAQ']:
                        default = 'N'
                        if badkx * self.stat['PAQKeepLen'] > badlx * self.stat['PAQKeepSeq']:   # Lose sequence!
                            default = 'Y'
                        if rje.yesNo('%s worst: -%s aa if kept vs -%s aa if lost. Remove?' % (bads.shortName(),rje.integerString(badkx),rje.integerString(badlx)),default):
                            seqlist.removeSeq(text='PAQ%d: -%s aa if kept vs -%s aa if lost. (Manual decision.)' % (self.stat['PAQCyc'],rje.integerString(badkx),rje.integerString(badlx)),seq=bads)
                        else:   # X out
                            haqlist.mapX(bads)
                    else:
                        self.verbose(1,3,'%s worst: -%s aa if kept vs -%s aa if lost.' % (bads.shortName(),rje.integerString(badkx),rje.integerString(badlx)),1)
                        #!# Add option for upweighting certain sequence type? (e.g. vs fragment or hypothetical?)
                        if badkx * self.stat['PAQKeepLen'] > badlx * self.stat['PAQKeepSeq']:   # Lose sequence!
                            seqlist.removeSeq(text='PAQ%d: -%s aa if kept vs -%s aa if lost.' % (self.stat['PAQCyc'],rje.integerString(badkx),rje.integerString(badlx)),seq=bads)
                        else:   # X out
                            haqlist.mapX(bads)
                ### <iii> ### Recalculate goodres
                goodres.append(self._getGood(haqlist.seq))
                goodseq.append(haqlist.seqNum())
                self.verbose(1,3,'%d -> %d "good" aa' % (goodres[-2],goodres[-1]),1)
                        
        ### <PAQ5> ### Reinstate UnX'd sequence:
            _stage = '<5> Replacing sequences'
            for seq in haqlist.seq:
                [seq.info['PAQ'],seq.info['Sequence']] = [seq.info['Sequence'],seq.info['PAQ']]
            if self.opt['ManPAQ'] and rje.checkForFile('%s.manpaq.fas' % haqlist.info['Basefile']):
                os.unlink('%s.manpaq.fas' % haqlist.info['Basefile'])

        except:
            self.log.errorLog('rje_haq.py ~ Problem with pairwiseAQ %s.' % _stage, True)
Esempio n. 13
0
    def singleSeqAQ(self,
                    seqlist,
                    focus=[
                        0, -1
                    ]):  ### Performs SAQ on seqlist, adding seq.info['SAQ']
        '''
        Performs SAQ on seqlist, adding seq.info['SAQ'].
        >> seqlist:rje_seq.SeqList Object
        - NB. This object will itself have sequences removed from it, so beware!
        - A new info key will be added: SAQX = SAQ sequences with individual Xs
        - A new info key will be added: SAQ = SAQ sequences with aligment Xs
        >> focus:list of range positions [X:Y] to look at. If Y=0 then [X:].
        '''
        ### <SAQ1> ### Setup
        try:
            _stage = '<1> Setup'
            haqlist = seqlist  # SeqList Object to store individually Xd sequences
            query = haqlist.obj['QuerySeq']
            if self.opt['NoQuery']:
                query = None
            badres = [-1, 0]  # List of how many bad residues in total dataset
            block_align = {
            }  # Dictionary of whether residue in block of sequence that is well-aligned or not
            res_align = {
            }  # Dictionary of whether residue of sequence is well-aligned or not
            res_gap = {
            }  # Dictionary of whether residue of sequence is a gap or not
            gap_align = {
            }  # Dictionary of whether residue of sequence is a gap in a well-aligned block or not
            for seq in haqlist.seq:
                seq.info['SAQ'] = seq.info['Sequence'][
                    0:]  # Note! Sequence is modified and SAQ not, then they are swapped at end!
                block_align[seq] = [False] * seq.seqLen()
                res_align[seq] = [False] * seq.seqLen()
                res_gap[seq] = [False] * seq.seqLen()
                gap_align[seq] = [False] * seq.seqLen()

        ### <SAQ2> ### Repeated cycles of defining well- and badly-aligned blocks
        #X#self.deBug(self.stat)
            _stage = '<2> BlockID'
            while badres[-1] != badres[-2]:  # Change in number of bad residues
                total_res = 0
                badres.append(
                    0)  # badres[-1] is the current number of bad residues
                infotxt = 'SAQ%d-%d: Calculating "bad" residues ...' % (
                    self.stat['SAQCyc'], len(badres) - 2)
                for seq in haqlist.seq:
                    myinfo = '%s %.1f%%' % (infotxt,
                                            (100.0 * haqlist.seq.index(seq) /
                                             haqlist.seqNum()))
                    self.log.printLog('\r#SAQ',
                                      myinfo,
                                      log=False,
                                      newline=False)
                    #self.verbose(0,3,'\r%45s' % myinfo,0)

                    ## <SAQ2a> ## For each sequence, mark residues as aligned or gapped
                    _stage = '<2a> Mark Residues'
                    for r in range(seq.seqLen()):
                        gap_align[seq][r] = False
                        res_align[seq][r] = False
                        if block_align[seq][r] or len(
                                badres
                        ) == 3:  # After first cycle, look only at well-aligned blocks (well-aligned for sequence not whole alignment)
                            a = seq.info['Sequence'][r]
                            res_gap[seq][r] = False
                            if a == '-':
                                res_gap[seq][r] = True
                                gap_align[seq][r] = True
                            else:  # 'X' handled by self._saqCon
                                conx = 0  # Matches with good regions of otherseqs (*including self*)
                                for otherseq in haqlist.seq[0:]:
                                    if otherseq == seq:  # > so self not counted!
                                        continue
                                    if len(otherseq.info['Sequence']) != len(
                                            seq.info['Sequence']):
                                        self.log.errorLog(
                                            'Sequence lengths do not match - should be aligned!',
                                            printerror=False)
                                        raise ValueError
                                    if (block_align[otherseq][r]
                                            or len(badres) == 3):
                                        conx += self._saqCon(
                                            a, otherseq.info['Sequence'][r])
                                #if seq == query and r > 590:
                                #    print seq.shortName(),r,conx,'vs',self.stat['SAQCon'],
                                if conx >= self.stat['SAQCon']:
                                    res_align[seq][r] = True
                        #if seq == query and r > 590:
                        #    print r, res_align[seq][r]

                    ## <SAQ2b> ## Marked regions of well-aligned residues for each sequence
                    _stage = '<2b> Mark Regions'
                    ## <i> ## Clear first
                    _stage = '<2b-i> Mark Regions'
                    for r in range(seq.seqLen()):
                        block_align[seq][r] = False
                    ## <ii> ## Recalculate
                    _stage = '<2b-ii> Mark Regions'
                    for r in range(seq.seqLen()):
                        _stage = '<2b-ii> Blocks'
                        if res_align[seq][r]:  # Start of potential block
                            blen = 0  # Block length (SAQBlock) = AAs
                            win = 0  # Window length = all sequence
                            matchx = 1  # Good residues in window (first residue must be good!) (SAQMatch)
                            while blen < self.stat[
                                    'SAQBlock'] and matchx < self.stat[
                                        'SAQMatch']:
                                win += 1
                                if (r + win
                                    ) >= seq.seqLen() or seq.info['Sequence'][
                                        r +
                                        win] == 'X':  # Hit Bad Region: Abort
                                    break
                                else:  # Better region
                                    if gap_align[seq][r + win]:  # Decent gap
                                        continue
                                    else:
                                        blen += 1  # Increase Block
                                        if res_align[seq][r +
                                                          win]:  # Good residue
                                            matchx += 1
                            #if seq == query and r > 590:
                            #    print seq.shortName(),r,matchx,'vs',self.stat['SAQMatch'],
                            if matchx >= self.stat['SAQMatch']:
                                for w in range((win + 1)):
                                    block_align[seq][r + w] = True
                        #if seq == query and r > 590:
                        #    print r, block_align[seq][r]
                    ## <iii> ## Update bad residue count
                    for r in range(seq.seqLen()):
                        _stage = '<2b-iii> Mark Regions'
                        #print seq.shortName(), r, seq.seqLen(), block_align[seq][r], res_gap[seq][r], badres[-1]   # Bad residue
                        if not block_align[seq][r] and not res_gap[seq][
                                r]:  # Bad residue
                            badres[-1] += 1
                        if not res_gap[seq][r]:
                            total_res += 1
                myinfo = '%s 100.0%%' % infotxt
                myinfo += ' => %s bad of %s total residues' % (
                    rje.integerString(
                        badres[-1]), rje.integerString(total_res))
                self.log.printLog('\r#SAQ', myinfo)
                #self.verbose(0,3,'\r%45s' % myinfo,0)
                if badres[-1] == total_res:
                    self.log.errorLog('All residues marked as bad in SAQ!',
                                      printerror=False,
                                      quitchoice=True)
                # Now have all residues in all sequences marked as good (block_align=True) or bad (block_align=False)

        ### <SAQ3> ### X out badly-aligned blocks
            _stage = '<3> X-Out'
            self.log.printLog('#SAQ',
                              'SAQ%d-%d: Masking "bad" residues ...' %
                              (self.stat['SAQCyc'], len(badres) - 2),
                              log=False,
                              newline=False)
            #self.verbose(0,3,'SAQ%d-%d: Masking "bad" residues ...' % (self.stat['SAQCyc'],len(badres)-2),0)
            for seq in haqlist.seq:
                newseq = ''
                for r in range(seq.seqLen()):
                    if block_align[seq][r] or seq.info['Sequence'][
                            r] == '-':  #!# Was backwards? res_gap[seq][r] == False:
                        newseq += seq.info['Sequence'][r]
                    else:  # Bad residue
                        newseq += 'X'
                seq.info['Sequence'] = newseq[0:]
                seq.info['SAQX'] = newseq[
                    0:]  # Stores Xd sequences for individuals for use in PAQ
            #!# Add saving of data in 'datafull' option

        ### <SAQ4> ### Remove sequences and/or badly-aligned regions
            _stage = '<4> Removal'
            self.log.printLog(
                '\r#SAQ',
                'SAQ%d-%d: Removing bad sequences and/or dodgy regions...' %
                (self.stat['SAQCyc'], len(badres) - 2),
                log=False,
                newline=False)
            #self.verbose(0,3,'\rSAQ%d-%d: Removing bad sequences and/or dodgy regions...' % (self.stat['SAQCyc'],len(badres)-2),0)
            ## <SAQ4a> ## Process Query first - only interested in good regions within query
            _stage = '<4a> Query Removal'
            if self.opt[
                    'NoQuery'] or query == None:  # No preprocessing of Query
                self.verbose(0, 4, 'no Master Query processing...', 0)
            else:
                haqlist.mapX(
                    query, qtrim=True, focus=focus
                )  # Replaces other sequence ends and query X columns with Xs
                self.verbose(0, 4,
                             'Query (%s) processed...' % query.shortName(), 0)
            self.verbose(0, 3, '', 1)
            if self.opt['ManSAQ']:
                haqlist.saveFasta(seqfile='%s.mansaq.fas' %
                                  haqlist.info['Basefile'])

            ## <SAQ4b> ## Cycle through other sequences (worst first) until no more good residues or sequences are lost
            _stage = '<4b> Seq Removal'
            goodres = [0, self._getGood(haqlist.seq)
                       ]  # List of number of 'good' residues
            goodseq = [0, haqlist.seqNum()]
            while goodres[-1] != goodres[-2] or goodseq[-1] != goodseq[-2]:
                colgood = [
                    0
                ] * haqlist.seq[0].seqLen()  # Good residues per column
                for r in range(haqlist.seq[0].seqLen()):
                    for seq in haqlist.seq:
                        if seq.info['Sequence'][r] != '-' and seq.info[
                                'Sequence'][r] != 'X':
                            colgood[r] += 1
                ## <i> ## Compare relative loss of masking and losing each sequence
                keepx = {
                }  # Dictionary of seq:number of lost residues if seq kept
                losex = {
                }  # Dictionary of seq:number of lost residues if seq lost
                badkx = -1  # Biggest loss if kept
                badlx = -1  # Biggest loss if lost
                bads = None  # Worst sequence
                for seq in haqlist.seq:
                    if seq == query and self.opt['NoQuery'] == False:
                        continue  # Next sequence
                    # Calculate keepx and losex
                    keepx[seq] = 0
                    for r in range(seq.seqLen()):
                        if seq.info['Sequence'][r] == 'X':
                            keepx[seq] += colgood[r]
                    losex[seq] = self._getGood([seq])
                    # Update bads if worse
                    if keepx[seq] > badkx:
                        badkx = keepx[seq]
                        badlx = losex[seq]
                        bads = seq
                    elif keepx[seq] == badkx and losex[seq] < badlx:
                        badlx = losex[seq]
                        bads = seq
                ## <ii> ## Remove bad sequences and/or regions
                if badkx > 0:
                    if self.opt['ManSAQ']:
                        default = 'N'
                        if badkx * self.stat['SAQKeepLen'] > badlx * self.stat[
                                'SAQKeepSeq']:  # Lose sequence!
                            default = 'Y'
                        if rje.yesNo(
                                '%s worst: -%s aa if kept vs -%s aa if lost. Remove?'
                                % (bads.shortName(), rje.integerString(badkx),
                                   rje.integerString(badlx)), default):
                            seqlist.removeSeq(
                                text=
                                'SAQ%d: -%s aa if kept vs -%s aa if lost. (Manual decision.)'
                                %
                                (self.stat['SAQCyc'], rje.integerString(badkx),
                                 rje.integerString(badlx)),
                                seq=bads)
                        else:  # X out
                            haqlist.mapX(bads)
                    else:
                        self.verbose(
                            1, 3,
                            '%s worst: -%s aa if kept vs -%s aa if lost.' %
                            (bads.shortName(), rje.integerString(badkx),
                             rje.integerString(badlx)), 1)
                        #!# Add option for upweighting certain sequence type? (e.g. vs fragment or hypothetical?)
                        if badkx * self.stat['SAQKeepLen'] > badlx * self.stat[
                                'SAQKeepSeq']:  # Lose sequence!
                            haqlist.removeSeq(
                                text='SAQ%d: -%s aa if kept vs -%s aa if lost.'
                                %
                                (self.stat['SAQCyc'], rje.integerString(badkx),
                                 rje.integerString(badlx)),
                                seq=bads)
                        else:  # X out
                            haqlist.mapX(bads)
                ### <iii> ### Recalculate goodres
                goodres.append(self._getGood(haqlist.seq))
                goodseq.append(haqlist.seqNum())
                #X#self.verbose(1,3,'%d -> %d "good" aa' % (goodres[-2],goodres[-1]),1)

            ### <SAQ5> ### Reinstate UnX'd sequence:
            _stage = '<4b> Seq Removal'
            for seq in haqlist.seq:
                #print seq.info
                [seq.info['SAQ'], seq.info['Sequence']
                 ] = [seq.info['Sequence'], seq.info['SAQ']]
            if self.opt['ManSAQ'] and rje.checkForFile(
                    '%s.mansaq.fas' % haqlist.info['Basefile']):
                os.unlink('%s.mansaq.fas' % haqlist.info['Basefile'])

        except:
            self.log.errorLog('Problem with singleSeqAQ() %s.' % _stage,
                              quitchoice=True)
Esempio n. 14
0
def loadOrthAln(callobj,seq,gopher=True):    ### Identifies file, loads and checks alignment.
    '''
    Identifies file, loads and checks alignment. If the identified file is not actually aligned, then RJE_SEQ will try to
    align the proteins using MUSCLE or ClustalW.
    >> callobj:Object containing settings for stats generation (MotifList, generally).
    >> seq:Sequence being analysed.
    >> gopher:bool [True] = whether to try to generate alignment with GOPHER if callobj.opt['Gopher']
    << aln = SeqList object containing alignment with queryseq
    '''
    try:
        ### Setup Attributes ###
        v = callobj.stat['Verbose']
        alndir = rje.makePath(callobj.info['AlnDir'])
        alnext = callobj.info['AlnExt']
        
        ### Identify File ###
        if alnext[0] != '.': alnext = '.%s' % alnext
        alnstart = [seq.info['AccNum'],seq.info['ID'],seq.shortName(),None]
        if v > 2: callobj.log.printLog('#PRESTO','%s' % callobj.opt)  #!# Old debugging? #!#
        if callobj.opt['Gopher'] and callobj.opt['FullForce']:
            if v > 0: callobj.log.printLog('#ALN','FullForce=T. Will call Gopher for %s regardless of existing files' % seq.shortName())
            alnstart = [None]
        for file in alnstart:
            if file:
                file = '%s%s%s' % (alndir,file,alnext)
                if rje.checkForFile(file): break  # File found
            else:
                #!# Sort out logging and see if Gopher can be used directly rather than just run() #!#
                ### Run GOPHER ###
                if gopher and callobj.opt['Gopher']:  #!# Add working version for PRESTO and SlimPickings #!#
                    callobj.deBug('Run GOPHER in %s' % callobj.info['GopherDir'])
                    mydir = os.getcwd()
                    os.chdir(callobj.info['GopherDir'])
                    callobj.log.printLog('\n#GOPHER','Running GOPHER on %s' % seq.shortName())
                    try:    #!# Add log.silent() method? #!#
                        gcmd = ['orthtree'] + callobj.cmd_list + ['gnspacc=T','i=-1']
                        solo_gopher = gopher_V2.GopherFork(log=callobj.log,cmd_list=gcmd)
                        solo_gopher.info['Name'] = seq.shortName()
                        solo_gopher.obj['Sequence'] = seq
                        solo_gopher.obj['BLAST'] = gopher_V2.Gopher(callobj.log,gcmd).setupBlast()  #!# Contemplate setting up Gopher in callobj #!#
                        solo_gopher.obj['BLAST'].log = callobj.log
                        solo_gopher.run('orthalign')    #X#gopher_V2.Gopher(callobj.log,gcmd).setMode())
                    except:
                        os.chdir(mydir)
                        callobj.log.errorLog('Problem with Gopher run!')
                        return None
                        
                    if not 'old_school':                            
                        inputseq = 'tmp%s.fas' % rje.randomString(8)
                        TMP = open(inputseq,'w')
                        TMP.write('>%s\n%s\n' % (seq.info['Name'],seq.info['Sequence']))
                        TMP.close()
                        gcmd = ['orthtree'] + callobj.cmd_list + ['gopher=%s' % inputseq, 'gnspacc=T','i=-1']
                        try:
                            mygopher = gopher_V2.Gopher(log=callobj.log,cmd_list=gcmd)
                            mygopher.run()
                        except:
                            os.chdir(mydir)
                            callobj.log.errorLog('Problem with Gopher run!',printerror=False)
                            return None
                        rje_blast.cleanupDB(callobj,dbfile=inputseq,deletesource=True)
                    os.chdir(mydir)
                if callobj.opt['Gopher']:  
                    file = '%s%s%s' % (alndir,seq.info['AccNum'],alnext)
                    if not os.path.exists(file):
                        file = None
                if not file:
                    callobj.log.printLog('#ALN','No alignment file found for %s in %s.' % (seq.shortName(),alndir),screen=False)
                    return None
        
        ### Load Alignment ###
        callobj.log.stat['Verbose'] = v - 1
        alncmd = ['seqin=None','query=%s' % seq.shortName(),'accnr=F','seqnr=F','autofilter=F','align=T','gnspacc=F'] 
        aln = rje_seq.SeqList(log=callobj.log,cmd_list=callobj.cmd_list+alncmd)
        #X#print file
        aln.loadSeqs(seqfile=file,seqtype='Protein',aln=True,nodup=None)
        callobj.log.stat['Verbose'] = v 
        ## Check Query ##
        qry = aln.obj['QuerySeq']
        if not qry:
            if aln.querySeq(query=seq.info['AccNum']):
                qry = aln.obj['QuerySeq']
            else:
                callobj.log.printLog('#ALN','Problem finding %s in %s.' % (seq.shortName(),file),screen=False)
                return None

        ### Check Alignment ###
        if aln.seqNum() < 2:
            callobj.log.printLog('#ALN','Not enough sequences for %s in %s.' % (seq.shortName(),file),screen=False)
            return None
        if aln._checkAln(aln=True,realign=True):
            return aln
        else:
            callobj.log.printLog('#ERR','%s not aligned!!!' % (file))
            return None       
    except:
        callobj.log.errorLog('Something bad has happened in rje_motif_stats.loadOrthAln()')
        callobj.log.stat['Verbose'] = v 
        return None
Esempio n. 15
0
    def pairwiseAQ(
            self,
            seqlist=None,
            query=None,
            focus=[0, 0]):  ### Performs PAQ on seqlist, adding seq.info['PAQ']
        '''
        Performs PAQ on seqlist, adding seq.info['PAQ']
        >> seqlist:rje_seq.SeqList Object
        - NB. This object will itself have sequences removed from it, so beware!
        - A new info key will be added: PAQ = PAQ sequences with alignment Xs
        >> focus:list of range positions [X:Y] to look at. If Y=0 then [X:]. 
        '''
        ### <PAQ0> ### Setup
        try:
            _stage = '<0> Setup'
            haqlist = seqlist  # SeqList Object to store individually Xd sequences
            if not query:
                query = haqlist.obj['QuerySeq']
            if self.opt['NoQuery'] or not query:
                query = haqlist.seq[random.randint(0, haqlist.seqNum() - 1)]
                self.log.printLog(
                    '#QRY', 'Temp (random) query %s assigned for PAQ' %
                    query.shortName())
            #!# paqx = [False] * seqlist.seq[0].seqLen()    # List of whether a column of the alignment is bad (has an X) [True] or not [False]
            #!# - make this a method?!

            pwaq = {}  # Dictionary of lists of pairwise alignements
            block_align = {
            }  # Dictionary of whether residue in block of sequence that is well-aligned or not
            for seq in haqlist.seq:
                block_align[seq] = [False] * seq.seqLen()
                seq.info['PAQ'] = seq.info['Sequence'][0:]
                if seq.info.has_key('SAQX') and len(
                        seq.info['SAQX']
                ) == seq.seqLen(
                ):  #!# Should no longer be issues due to length changes following realignment
                    seq.info['Sequence'] = seq.info['SAQX'][0:]
                elif seq.info.has_key('SAQX'):
                    self.log.errorLog(
                        'Cannot use SAQX for %s in PAQ as wrong length.' %
                        seq.shortName(),
                        printerror=False)
                for otherseq in haqlist.seq:
                    pwaq[(seq, otherseq)] = [False] * seq.seqLen()

        ### <PAQ1> ### Directional Pairwise Comparisons of sequences
            _stage = '<1> Pairwise Comparisons'
            infotxt = 'PAQ%d: Pairwise Comparisons ...' % self.stat['PAQCyc']
            #print self.stat
            for seq in haqlist.seq:
                for otherseq in haqlist.seq:
                    myinfo = '%s %.1f%% %.1f%%   ' % (
                        infotxt,
                        (100.0 * haqlist.seq.index(seq) / haqlist.seqNum()),
                        (100.0 * haqlist.seq.index(otherseq) /
                         haqlist.seqNum()))
                    self.log.printLog('\r#PAQ',
                                      myinfo,
                                      log=False,
                                      newline=False)
                    for r in range(seq.seqLen()):
                        ar = seq.info['Sequence'][r]
                        ## <i> ## Look for PW aligned block
                        _stage = '<1-i> Pairwise Comparisons'
                        if ar not in ['-', 'X']:  # Start of test block
                            blen = 0  # Block length (PAQBlock) = AAs
                            win = 0  # Window length = all sequence
                            matchx = 0  # Score for residues in window
                            while blen < self.stat['PAQBlock'] and (
                                    r + win
                            ) < seq.seqLen(
                            ):  # This time we allow overshoots in both directions
                                ar = seq.info['Sequence'][r + win]
                                at = otherseq.info['Sequence'][r + win]
                                if 'X' in [ar, at]:  # Hit Bad Region: Abort
                                    break
                                else:  # Better region
                                    if ar != '-':
                                        blen += 1  # Increase Block
                                        matchx += self._saqCon(ar, at)
                                win += 1
                        ## <ii> ## Update pwaq if block good
                            _stage = '<1-ii> Pairwise Comparisons'
                            if matchx >= self.stat['PAQMatch']:
                                for w in range(win):
                                    if seq.info['Sequence'][r +
                                                            w] in ['-', 'X']:
                                        pwaq[(seq, otherseq)][r + w] = False
                                    else:
                                        pwaq[(seq, otherseq)][r + w] = True
            self.log.printLog('\r#PAQ',
                              '%s 100.0% 100.0%.   ' % infotxt,
                              log=False)

            ### <PAQ2> ### Link back to Query
            _stage = '<2> Linking to Query'
            ### <PAQ2a> ### Network of Pairwise Quality alignments
            _stage = '<2a> Linking to Query'
            #self.verbose(1,3,'PAQ%d: Linking Residues to Query (%s)' % (self.stat['PAQCyc'],query.shortName()),0)
            infotxt = 'PAQ%d: Linking Residues to Query (%s) ...' % (
                self.stat['PAQCyc'], query.shortName())
            for r in range(query.seqLen()):
                _stage = '<2a> Linking to Query'
                self.log.printLog('\r#PAQ',
                                  '%s %.1f%%' % (infotxt,
                                                 (100.0 * r / query.seqLen())),
                                  log=False,
                                  newline=False)
                qok = {
                }  # Dictionary of whether residue in seq OK, i.e. linked to query
                for seq in haqlist.seq:
                    qok[seq] = False
                qok[query] = True
                sok = [0, 1]  # List of OK sequence for residue
                while sok[-2] != sok[-1]:
                    ## <i> ## Match pairs, starting with query
                    _stage = '<2a-i> Linking to Query'
                    for seq in haqlist.seq:
                        if qok[seq]:
                            for otherseq in haqlist.seq:
                                if pwaq[(seq, otherseq)][r] or pwaq[(otherseq,
                                                                     seq)][r]:
                                    qok[otherseq] = True
                    ## <ii> ## Update sok
                    _stage = '<2a-ii> Linking to Query'
                    sok.append(0)
                    for seq in haqlist.seq:
                        if qok[seq]:
                            sok[-1] += 1
                            block_align[seq][r] = True
                _stage = '<2a-iii> Linking to Query'
                if sok[-1] == 1:  # Only query OK!
                    block_align[query][r] = False
            self.log.printLog('\r#PAQ', '%s 100.0%%' % infotxt, log=False)

            ### <PAQ2b> ### Allow for divergence (Conserved Anchors)
            _stage = '<2b> Anchors'
            if self.opt['Anchors']:
                infotxt = 'PAQ%d: Accounting for divergence within aligned regions ...' % self.stat[
                    'PAQCyc']
                ## <i> ## Setup gapped list
                gapped = [
                    False
                ] * query.seqLen()  # Whether column of alignment is gapped
                for seq in haqlist.seq:
                    self.log.printLog(
                        '\r#PAQ',
                        '%s %.1f%%  ' %
                        (infotxt,
                         (50.0 * haqlist.seq.index(seq) / haqlist.seqNum())),
                        log=False,
                        newline=False)
                    (start, end) = (0, seq.seqLen())
                    while seq.info['Sequence'][start] == '-':
                        start += 1
                    while seq.info['Sequence'][end - 1] == '-':
                        end -= 1
                    for r in range(start, end):
                        if seq.info['Sequence'][r] == '-':
                            gapped[r] = True
                ## <ii> ## Correction
                for seq in haqlist.seq:
                    self.log.printLog(
                        '\r#PAQ',
                        '%s %.1f%%  ' %
                        (infotxt,
                         (50 +
                          (50.0 * haqlist.seq.index(seq) / haqlist.seqNum()))),
                        log=False,
                        newline=False)
                    for r in range(seq.seqLen()):
                        if block_align[seq][r] or gapped[
                                r]:  # No need for correction
                            continue
                        # Move in both directions: if good residues (or sequence end) reached before gaps then reinstate
                        winf = 0
                        fwd = True
                        fok = False
                        winb = 0
                        bwd = True
                        bok = False
                        while fwd or bwd:
                            # End of seqs
                            if (r + winf) >= seq.seqLen():
                                fwd = False
                            if (r - winb) < 0:
                                bwd = False
                            # Gaps/OK
                            if fwd:
                                if gapped[r + winf]:
                                    fok = False
                                    fwd = False
                                elif block_align[seq][r + winf]:
                                    fwd = False
                                else:
                                    winf += 1
                            if bwd:
                                if gapped[r - winb]:
                                    bok = False
                                    bwd = False
                                elif block_align[seq][r - winb]:
                                    bwd = False
                                else:
                                    winb += 1
                        if fok and bok:  # Reinstate
                            for w in range(r - winb, r + winf + 1):
                                block_align[seq][w] = True
                self.log.printLog('\r#PAQ',
                                  '%s 100.0%%  ' % infotxt,
                                  log=False)

        ### <PAQ3> ### X out badly-aligned blocks
            _stage = '<3> Making bad sequence blocks'
            for seq in haqlist.seq:
                newseq = ''
                for r in range(seq.seqLen()):
                    if block_align[seq][r] or seq.info['Sequence'][r] == '-':
                        newseq += seq.info['Sequence'][r]
                    else:  # Bad residue
                        newseq += 'X'
                seq.info['Sequence'] = newseq[0:]
            #!# Add saving of data in 'datafull' option

        ### <PAQ4> ### Remove sequences and/or badly-aligned regions
            _stage = '<4> Removing sequences/regions'
            self.verbose(
                0, 4, 'PAQ%d: Removing bad sequences and/or dodgy regions...' %
                self.stat['PAQCyc'], 0)
            ## <PAQ4a> ## Process Query first - only interested in good regions within query
            if self.opt['NoQuery']:  # No preprocessing of Query
                self.verbose(0, 4, 'no Master Query processing...', 0)
            else:
                haqlist.mapX(
                    query, qtrim=True, focus=focus
                )  # Replaces other sequence ends and query X columns with Xs
                self.verbose(0, 4,
                             'Query (%s) processed...' % query.shortName(), 0)
            self.verbose(0, 3, '', 1)
            if self.opt['ManPAQ']:
                haqlist.saveFasta(seqfile='%s.manpaq.fas' %
                                  haqlist.info['Basefile'])

            ## <PAQ4b> ## Cycle through other sequences (worst first) until no more good residues are lost
            goodres = [0, self._getGood(haqlist.seq)
                       ]  # List of number of 'good' residues
            goodseq = [0, haqlist.seqNum()]
            while goodres[-1] != goodres[-2] or goodseq[-1] != goodseq[-2]:
                colgood = [
                    0
                ] * haqlist.seq[0].seqLen()  # Good residues per column
                for r in range(haqlist.seq[0].seqLen()):
                    for seq in haqlist.seq:
                        if seq.info['Sequence'][r] != '-' and seq.info[
                                'Sequence'][r] != 'X':
                            colgood[r] += 1
                ## <i> ## Compare relative loss of masking and losing each sequence
                keepx = {
                }  # Dictionary of seq:number of lost residues if seq kept
                losex = {
                }  # Dictionary of seq:number of lost residues if seq lost
                badkx = -1  # Biggest loss if kept
                badlx = -1  # Biggest loss if lost
                bads = None  # Worst sequence
                for seq in haqlist.seq:
                    if seq == query and self.opt['NoQuery'] == False:
                        continue  # Next sequence
                    # Calculate keepx and losex
                    keepx[seq] = 0
                    for r in range(seq.seqLen()):
                        if seq.info['Sequence'][r] == 'X':
                            keepx[seq] += colgood[r]
                        #?# In Perl HAQESAC there was an option to ignore Orphans in this calculation. Reinstate?
                    losex[seq] = self._getGood([seq])
                    # Update bads if worse
                    if keepx[seq] > badkx:
                        badkx = keepx[seq]
                        badlx = losex[seq]
                        bads = seq
                    elif keepx[seq] == badkx and losex[seq] < badlx:
                        badlx = losex[seq]
                        bads = seq
                ## <ii> ## Remove bad sequences and/or regions
                if badkx > 0:
                    if self.opt['ManPAQ']:
                        default = 'N'
                        if badkx * self.stat['PAQKeepLen'] > badlx * self.stat[
                                'PAQKeepSeq']:  # Lose sequence!
                            default = 'Y'
                        if rje.yesNo(
                                '%s worst: -%s aa if kept vs -%s aa if lost. Remove?'
                                % (bads.shortName(), rje.integerString(badkx),
                                   rje.integerString(badlx)), default):
                            seqlist.removeSeq(
                                text=
                                'PAQ%d: -%s aa if kept vs -%s aa if lost. (Manual decision.)'
                                %
                                (self.stat['PAQCyc'], rje.integerString(badkx),
                                 rje.integerString(badlx)),
                                seq=bads)
                        else:  # X out
                            haqlist.mapX(bads)
                    else:
                        self.verbose(
                            1, 3,
                            '%s worst: -%s aa if kept vs -%s aa if lost.' %
                            (bads.shortName(), rje.integerString(badkx),
                             rje.integerString(badlx)), 1)
                        #!# Add option for upweighting certain sequence type? (e.g. vs fragment or hypothetical?)
                        if badkx * self.stat['PAQKeepLen'] > badlx * self.stat[
                                'PAQKeepSeq']:  # Lose sequence!
                            seqlist.removeSeq(
                                text='PAQ%d: -%s aa if kept vs -%s aa if lost.'
                                %
                                (self.stat['PAQCyc'], rje.integerString(badkx),
                                 rje.integerString(badlx)),
                                seq=bads)
                        else:  # X out
                            haqlist.mapX(bads)
                ### <iii> ### Recalculate goodres
                goodres.append(self._getGood(haqlist.seq))
                goodseq.append(haqlist.seqNum())
                self.verbose(1, 3,
                             '%d -> %d "good" aa' % (goodres[-2], goodres[-1]),
                             1)

        ### <PAQ5> ### Reinstate UnX'd sequence:
            _stage = '<5> Replacing sequences'
            for seq in haqlist.seq:
                [seq.info['PAQ'], seq.info['Sequence']
                 ] = [seq.info['Sequence'], seq.info['PAQ']]
            if self.opt['ManPAQ'] and rje.checkForFile(
                    '%s.manpaq.fas' % haqlist.info['Basefile']):
                os.unlink('%s.manpaq.fas' % haqlist.info['Basefile'])

        except:
            self.log.errorLog(
                'rje_haq.py ~ Problem with pairwiseAQ %s.' % _stage, True)
Esempio n. 16
0
    def buildPam(self):     ### Builds PAM Matrix in memory
        '''Builds PAM matrix in memory.'''
        try:
            ### Check for Alternative PAM Matrix ###
            if self.info['AltPam'].lower() not in ['','none']:
                self.altPAM()
            
            self.verbose(0,3,"Reading PAM1 matrix from %s" % self.info['Name'],2)
            ### <a> ### Open file & Read Lines
            pamfiles = [self.info['Name'],rje.makePath(self.info['Path']) + self.info['Name'],rje.makePath(self.info['Path']) + rje.makePath('../data/') + self.info['Name']]
            self.info['Name'] = None
            for pfile in pamfiles:
                if rje.checkForFile(pfile):
                    file_lines = open(pfile, 'r').readlines()
                    self.info['Name'] = pfile
                    break
            if not self.info['Name']:
                for pfile in pamfiles: self.printLog('#ERR','File "%s" not found' % pfile)
                self.printLog('#ERR','No PAM file found!')
                raise ValueError

            ### <b> ### Read in alphabet
            self.verbose(0,3,file_lines[0],1)
            if file_lines[0].upper().find('X') >= 0:
                self.opt['X-Value'] = False
            if file_lines[0].find('-') >= 0:
                self.opt['GapValue'] = False
            self.alphabet = file_lines[0].split()

            ### <c> ### Make PAM0
            ## <i> ## Clear dics
            zeropamp = {}
            for r in self.alphabet:
                for c in self.alphabet:
                    zeropamp[r + c] = 0
                zeropamp[r + r] = 1
                if self.opt['X-Value']:
                    zeropamp['X' + r] = 1
                    zeropamp[r + 'X'] = 1
                if self.opt['GapValue']:
                    zeropamp['-' + r] = 1
                    zeropamp[r + '-'] = 1
            if self.opt['X-Value']:
                zeropamp['XX'] = 1
            if self.opt['GapValue']:
                zeropamp['--'] = 1
            if self.opt['X-Value'] and self.opt['GapValue']:
                zeropamp['-X'] = 1
                zeropamp['X-'] = 1
            ## <ii> ## New Matrix
            newmatrix = PAM(pam=0,rawpamp=zeropamp,alpha=self.alphabet)
            self.matrix.append(newmatrix)

            ## <d> ## Read in PAM1
            rawpamp = {}
            line = 1
            for r in self.alphabet:
                pamline = file_lines[line].split()
                if len(pamline) != (len(self.alphabet)+1):
                    self.log.errorLog("%s has wrong format! Does not match %s" % (pamline, self.alphabet),printerror=False,quitchoice=True)
                    raise
                for c in range(int(len(self.alphabet))):
                    prob = float(pamline[c+1])
                    rawpamp[r + self.alphabet[c]] = prob
                if self.opt['X-Value']:
                    rawpamp['X' + r] = 1
                    rawpamp[r + 'X'] = 1
                if self.opt['GapValue']:
                    rawpamp['-' + r] = 1
                    rawpamp[r + '-'] = 1
                line += 1
            if self.opt['X-Value']:
                rawpamp['XX'] = 1
            if self.opt['GapValue']:
                rawpamp['--'] = 1
            if self.opt['X-Value'] and self.opt['GapValue']:
                rawpamp['-X'] = 1
                rawpamp['X-'] = 1
            newmatrix = PAM(pam=1,rawpamp=rawpamp,alpha=self.alphabet)
            self.matrix.append(newmatrix)

            ## <e> ## Raise to pammax
            self.log.printLog('\r#PAM','Building PAM Matrices <= %d: ' % self.stat['PamMax'],log=False,newline=False)
            self.pamUp()
            self.log.printLog('\r#PAM','Building PAM Matrices <= %d: Complete.' % self.stat['PamMax'])
        except:
            self.log.errorLog('Fatal Error in PamCtrl.buildPam().')
            raise
Esempio n. 17
0
    def singleSeqAQ(self,seqlist,focus=[0,-1]):     ### Performs SAQ on seqlist, adding seq.info['SAQ']
        '''
        Performs SAQ on seqlist, adding seq.info['SAQ'].
        >> seqlist:rje_seq.SeqList Object
        - NB. This object will itself have sequences removed from it, so beware!
        - A new info key will be added: SAQX = SAQ sequences with individual Xs
        - A new info key will be added: SAQ = SAQ sequences with aligment Xs
        >> focus:list of range positions [X:Y] to look at. If Y=0 then [X:].
        '''
        ### <SAQ1> ### Setup
        try:
            _stage = '<1> Setup'
            haqlist = seqlist   # SeqList Object to store individually Xd sequences
            query = haqlist.obj['QuerySeq']
            if self.opt['NoQuery']:
                query = None
            badres = [-1,0]     # List of how many bad residues in total dataset
            block_align = {}    # Dictionary of whether residue in block of sequence that is well-aligned or not
            res_align = {}      # Dictionary of whether residue of sequence is well-aligned or not
            res_gap = {}        # Dictionary of whether residue of sequence is a gap or not
            gap_align = {}      # Dictionary of whether residue of sequence is a gap in a well-aligned block or not
            for seq in haqlist.seq:
                seq.info['SAQ'] = seq.info['Sequence'][0:]      # Note! Sequence is modified and SAQ not, then they are swapped at end!
                block_align[seq] = [False] * seq.seqLen()
                res_align[seq] = [False] * seq.seqLen()
                res_gap[seq] = [False] * seq.seqLen()
                gap_align[seq] = [False] * seq.seqLen()

        ### <SAQ2> ### Repeated cycles of defining well- and badly-aligned blocks
            #X#self.deBug(self.stat)
            _stage = '<2> BlockID'
            while badres[-1] != badres[-2]:     # Change in number of bad residues
                total_res = 0
                badres.append(0)    # badres[-1] is the current number of bad residues
                infotxt = 'SAQ%d-%d: Calculating "bad" residues ...' % (self.stat['SAQCyc'],len(badres)-2)
                for seq in haqlist.seq:
                    myinfo = '%s %.1f%%' % (infotxt,(100.0 * haqlist.seq.index(seq) / haqlist.seqNum()))
                    self.log.printLog('\r#SAQ',myinfo,log=False,newline=False)
                    #self.verbose(0,3,'\r%45s' % myinfo,0)

                    ## <SAQ2a> ## For each sequence, mark residues as aligned or gapped
                    _stage = '<2a> Mark Residues'
                    for r in range(seq.seqLen()):
                        gap_align[seq][r] = False
                        res_align[seq][r] = False
                        if block_align[seq][r] or len(badres) == 3:     # After first cycle, look only at well-aligned blocks (well-aligned for sequence not whole alignment)
                            a = seq.info['Sequence'][r]
                            res_gap[seq][r] = False
                            if a == '-':
                                res_gap[seq][r] = True
                                gap_align[seq][r] = True
                            else:   # 'X' handled by self._saqCon
                                conx = 0  # Matches with good regions of otherseqs (*including self*)
                                for otherseq in haqlist.seq[0:]:
                                    if otherseq == seq:     # > so self not counted!
                                        continue
                                    if len(otherseq.info['Sequence']) != len(seq.info['Sequence']):
                                        self.log.errorLog('Sequence lengths do not match - should be aligned!',printerror=False)
                                        raise ValueError
                                    if (block_align[otherseq][r] or len(badres) == 3):
                                        conx += self._saqCon(a, otherseq.info['Sequence'][r])
                                #if seq == query and r > 590:
                                #    print seq.shortName(),r,conx,'vs',self.stat['SAQCon'],
                                if conx >= self.stat['SAQCon']:    
                                    res_align[seq][r] = True
                        #if seq == query and r > 590:
                        #    print r, res_align[seq][r]

                    ## <SAQ2b> ## Marked regions of well-aligned residues for each sequence
                    _stage = '<2b> Mark Regions'
                    ## <i> ## Clear first
                    _stage = '<2b-i> Mark Regions'
                    for r in range(seq.seqLen()):
                        block_align[seq][r] = False
                    ## <ii> ## Recalculate
                    _stage = '<2b-ii> Mark Regions'
                    for r in range(seq.seqLen()):
                        _stage = '<2b-ii> Blocks'
                        if res_align[seq][r]:   # Start of potential block
                            blen = 0    # Block length (SAQBlock) = AAs
                            win = 0     # Window length = all sequence
                            matchx = 1  # Good residues in window (first residue must be good!) (SAQMatch)
                            while blen < self.stat['SAQBlock'] and matchx < self.stat['SAQMatch']:
                                win += 1
                                if (r + win) >= seq.seqLen() or seq.info['Sequence'][r+win] == 'X':     # Hit Bad Region: Abort
                                    break
                                else:   # Better region
                                    if gap_align[seq][r+win]:   # Decent gap
                                        continue
                                    else:
                                        blen += 1   # Increase Block
                                        if res_align[seq][r+win]:   # Good residue
                                            matchx += 1
                            #if seq == query and r > 590:
                            #    print seq.shortName(),r,matchx,'vs',self.stat['SAQMatch'],
                            if matchx >= self.stat['SAQMatch']:
                                for w in range((win+1)):
                                    block_align[seq][r+w] = True
                        #if seq == query and r > 590:
                        #    print r, block_align[seq][r]
                    ## <iii> ## Update bad residue count
                    for r in range(seq.seqLen()):
                        _stage = '<2b-iii> Mark Regions'
                        #print seq.shortName(), r, seq.seqLen(), block_align[seq][r], res_gap[seq][r], badres[-1]   # Bad residue
                        if not block_align[seq][r] and not res_gap[seq][r]:   # Bad residue
                            badres[-1] += 1
                        if not res_gap[seq][r]:
                            total_res += 1
                myinfo = '%s 100.0%%' % infotxt
                myinfo += ' => %s bad of %s total residues' % (rje.integerString(badres[-1]),rje.integerString(total_res))
                self.log.printLog('\r#SAQ',myinfo)
                #self.verbose(0,3,'\r%45s' % myinfo,0)
                if badres[-1] == total_res:
                    self.log.errorLog('All residues marked as bad in SAQ!',printerror=False,quitchoice=True)
                # Now have all residues in all sequences marked as good (block_align=True) or bad (block_align=False)

        ### <SAQ3> ### X out badly-aligned blocks
            _stage = '<3> X-Out'
            self.log.printLog('#SAQ','SAQ%d-%d: Masking "bad" residues ...' % (self.stat['SAQCyc'],len(badres)-2),log=False,newline=False)
            #self.verbose(0,3,'SAQ%d-%d: Masking "bad" residues ...' % (self.stat['SAQCyc'],len(badres)-2),0)
            for seq in haqlist.seq:
                newseq = ''
                for r in range(seq.seqLen()):
                    if block_align[seq][r] or seq.info['Sequence'][r] == '-':   #!# Was backwards? res_gap[seq][r] == False:
                        newseq += seq.info['Sequence'][r]
                    else: # Bad residue
                        newseq += 'X'
                seq.info['Sequence'] = newseq[0:]
                seq.info['SAQX'] = newseq[0:]       # Stores Xd sequences for individuals for use in PAQ
            #!# Add saving of data in 'datafull' option

        ### <SAQ4> ### Remove sequences and/or badly-aligned regions
            _stage = '<4> Removal'
            self.log.printLog('\r#SAQ','SAQ%d-%d: Removing bad sequences and/or dodgy regions...' % (self.stat['SAQCyc'],len(badres)-2),log=False,newline=False)
            #self.verbose(0,3,'\rSAQ%d-%d: Removing bad sequences and/or dodgy regions...' % (self.stat['SAQCyc'],len(badres)-2),0)
            ## <SAQ4a> ## Process Query first - only interested in good regions within query
            _stage = '<4a> Query Removal'
            if self.opt['NoQuery'] or query == None:  # No preprocessing of Query
                self.verbose(0,4,'no Master Query processing...',0)
            else:
                haqlist.mapX(query, qtrim=True, focus=focus) # Replaces other sequence ends and query X columns with Xs
                self.verbose(0,4,'Query (%s) processed...' % query.shortName(),0)
            self.verbose(0,3,'',1)
            if self.opt['ManSAQ']:
                haqlist.saveFasta(seqfile='%s.mansaq.fas' % haqlist.info['Basefile'])

            ## <SAQ4b> ## Cycle through other sequences (worst first) until no more good residues or sequences are lost
            _stage = '<4b> Seq Removal'
            goodres = [0, self._getGood(haqlist.seq)]   # List of number of 'good' residues
            goodseq = [0, haqlist.seqNum()]
            while goodres[-1] != goodres[-2] or goodseq[-1] != goodseq[-2]:
                colgood = [0] * haqlist.seq[0].seqLen()    # Good residues per column
                for r in range(haqlist.seq[0].seqLen()):
                    for seq in haqlist.seq:
                        if seq.info['Sequence'][r] != '-' and seq.info['Sequence'][r] != 'X':
                            colgood[r] += 1
                ## <i> ## Compare relative loss of masking and losing each sequence
                keepx = {}  # Dictionary of seq:number of lost residues if seq kept
                losex = {}  # Dictionary of seq:number of lost residues if seq lost
                badkx = -1  # Biggest loss if kept
                badlx = -1  # Biggest loss if lost
                bads = None # Worst sequence
                for seq in haqlist.seq:
                    if seq == query and self.opt['NoQuery'] == False:
                        continue    # Next sequence
                    # Calculate keepx and losex
                    keepx[seq] = 0
                    for r in range(seq.seqLen()):
                        if seq.info['Sequence'][r] == 'X':
                            keepx[seq] += colgood[r]
                    losex[seq] = self._getGood([seq])
                    # Update bads if worse
                    if keepx[seq] > badkx:
                        badkx = keepx[seq]
                        badlx = losex[seq]
                        bads = seq
                    elif keepx[seq] == badkx and losex[seq] < badlx:
                        badlx = losex[seq]
                        bads = seq
                ## <ii> ## Remove bad sequences and/or regions
                if badkx > 0:
                    if self.opt['ManSAQ']:
                        default = 'N'
                        if badkx * self.stat['SAQKeepLen'] > badlx * self.stat['SAQKeepSeq']:   # Lose sequence!
                            default = 'Y'
                        if rje.yesNo('%s worst: -%s aa if kept vs -%s aa if lost. Remove?' % (bads.shortName(),rje.integerString(badkx),rje.integerString(badlx)),default):
                            seqlist.removeSeq(text='SAQ%d: -%s aa if kept vs -%s aa if lost. (Manual decision.)' % (self.stat['SAQCyc'],rje.integerString(badkx),rje.integerString(badlx)),seq=bads)
                        else:   # X out
                            haqlist.mapX(bads)
                    else:
                        self.verbose(1,3,'%s worst: -%s aa if kept vs -%s aa if lost.' % (bads.shortName(),rje.integerString(badkx),rje.integerString(badlx)),1)
                        #!# Add option for upweighting certain sequence type? (e.g. vs fragment or hypothetical?)
                        if badkx * self.stat['SAQKeepLen'] > badlx * self.stat['SAQKeepSeq']:   # Lose sequence!
                            haqlist.removeSeq(text='SAQ%d: -%s aa if kept vs -%s aa if lost.' % (self.stat['SAQCyc'],rje.integerString(badkx),rje.integerString(badlx)),seq=bads)
                        else:   # X out
                            haqlist.mapX(bads)
                ### <iii> ### Recalculate goodres
                goodres.append(self._getGood(haqlist.seq))
                goodseq.append(haqlist.seqNum())
                #X#self.verbose(1,3,'%d -> %d "good" aa' % (goodres[-2],goodres[-1]),1)

            ### <SAQ5> ### Reinstate UnX'd sequence:
            _stage = '<4b> Seq Removal'
            for seq in haqlist.seq:
                #print seq.info
                [seq.info['SAQ'],seq.info['Sequence']] = [seq.info['Sequence'],seq.info['SAQ']]
            if self.opt['ManSAQ'] and rje.checkForFile('%s.mansaq.fas' % haqlist.info['Basefile']):
                os.unlink('%s.mansaq.fas' % haqlist.info['Basefile'])

        except:
            self.log.errorLog('Problem with singleSeqAQ() %s.' % _stage, quitchoice=True)
Esempio n. 18
0
    def buildPam(self):  ### Builds PAM Matrix in memory
        '''Builds PAM matrix in memory.'''
        try:
            ### Check for Alternative PAM Matrix ###
            if self.info['AltPam'].lower() not in ['', 'none']:
                self.altPAM()

            self.verbose(0, 3,
                         "Reading PAM1 matrix from %s" % self.info['Name'], 2)
            ### <a> ### Open file & Read Lines
            pamfiles = [
                self.info['Name'],
                rje.makePath(self.info['Path']) + self.info['Name'],
                rje.makePath(self.info['Path']) + rje.makePath('../data/') +
                self.info['Name']
            ]
            self.info['Name'] = None
            for pfile in pamfiles:
                if rje.checkForFile(pfile):
                    file_lines = open(pfile, 'r').readlines()
                    self.info['Name'] = pfile
                    break
            if not self.info['Name']:
                for pfile in pamfiles:
                    self.printLog('#ERR', 'File "%s" not found' % pfile)
                self.printLog('#ERR', 'No PAM file found!')
                raise ValueError

            ### <b> ### Read in alphabet
            self.verbose(0, 3, file_lines[0], 1)
            if file_lines[0].upper().find('X') >= 0:
                self.opt['X-Value'] = False
            if file_lines[0].find('-') >= 0:
                self.opt['GapValue'] = False
            self.alphabet = file_lines[0].split()

            ### <c> ### Make PAM0
            ## <i> ## Clear dics
            zeropamp = {}
            for r in self.alphabet:
                for c in self.alphabet:
                    zeropamp[r + c] = 0
                zeropamp[r + r] = 1
                if self.opt['X-Value']:
                    zeropamp['X' + r] = 1
                    zeropamp[r + 'X'] = 1
                if self.opt['GapValue']:
                    zeropamp['-' + r] = 1
                    zeropamp[r + '-'] = 1
            if self.opt['X-Value']:
                zeropamp['XX'] = 1
            if self.opt['GapValue']:
                zeropamp['--'] = 1
            if self.opt['X-Value'] and self.opt['GapValue']:
                zeropamp['-X'] = 1
                zeropamp['X-'] = 1
            ## <ii> ## New Matrix
            newmatrix = PAM(pam=0, rawpamp=zeropamp, alpha=self.alphabet)
            self.matrix.append(newmatrix)

            ## <d> ## Read in PAM1
            rawpamp = {}
            line = 1
            for r in self.alphabet:
                pamline = file_lines[line].split()
                if len(pamline) != (len(self.alphabet) + 1):
                    self.log.errorLog(
                        "%s has wrong format! Does not match %s" %
                        (pamline, self.alphabet),
                        printerror=False,
                        quitchoice=True)
                    raise
                for c in range(int(len(self.alphabet))):
                    prob = float(pamline[c + 1])
                    rawpamp[r + self.alphabet[c]] = prob
                if self.opt['X-Value']:
                    rawpamp['X' + r] = 1
                    rawpamp[r + 'X'] = 1
                if self.opt['GapValue']:
                    rawpamp['-' + r] = 1
                    rawpamp[r + '-'] = 1
                line += 1
            if self.opt['X-Value']:
                rawpamp['XX'] = 1
            if self.opt['GapValue']:
                rawpamp['--'] = 1
            if self.opt['X-Value'] and self.opt['GapValue']:
                rawpamp['-X'] = 1
                rawpamp['X-'] = 1
            newmatrix = PAM(pam=1, rawpamp=rawpamp, alpha=self.alphabet)
            self.matrix.append(newmatrix)

            ## <e> ## Raise to pammax
            self.log.printLog('\r#PAM',
                              'Building PAM Matrices <= %d: ' %
                              self.stat['PamMax'],
                              log=False,
                              newline=False)
            self.pamUp()
            self.log.printLog(
                '\r#PAM',
                'Building PAM Matrices <= %d: Complete.' % self.stat['PamMax'])
        except:
            self.log.errorLog('Fatal Error in PamCtrl.buildPam().')
            raise
Esempio n. 19
0
def badasp(out,mainlog,cmd_list,tree=None): ### Main BADASP Method
    '''
    Main BADASP Method. Automated run if interactive < 1
    <1> Load Sequences and Tree
    <2> Define Subfamilies
    <3> GASP Ancestral Sequence Prediction
    <4> Peform Functional Specificity and Sequence Conservation Calculations
    <5> Output Results
    '''
    try:    ### <0> ### Setup
        _seqfile = None
        _treefile = None
        append_file = None
        basefile = None
        for cmd in cmd_list:
            if cmd.find('seqin=') == 0:
                _seqfile = cmd[len('seqin='):]
                if _seqfile[-4] == '.':
                    _seqfile = _seqfile[:-4]
            if cmd.find('useanc=') == 0:
                _seqfile = cmd[len('useanc='):]
                if _seqfile[-8:] == '.anc.fas':
                    _seqfile = _seqfile[:-8]
            if cmd.find('nsfin=') == 0:
                _treefile = cmd[len('nsfin='):]
            if cmd.find('append=') == 0:
                append_file = cmd[len('append='):]
            if cmd.find('basefile=') == 0:
                basefile = cmd[len('basefile='):]
        if _seqfile and os.path.exists('%s.grp' % _seqfile):
            cmd_list.append('group=%s.grp' % _seqfile)
        if _seqfile and _treefile == None:
            if rje.checkForFile('%s.nwk' % _seqfile): _treefile = '%s.nwk' % _seqfile
            else: _treefile = '%s.nsf' % _seqfile
            out.verbose(0,2,'Looking for treefile %s.' % _treefile,1)
            if rje.checkForFile(_treefile):
                cmd_list.append('nsfin=%s' % _treefile)
                
        if tree == None:
            mainlog.verbose(0,1,'Tree: %s' % cmd_list,2)
            tree = rje_tree.Tree(log=mainlog,cmd_list=cmd_list)
            #tree._setupFromCmd()
        if tree.stat['MinFamNum'] < 2:
            tree.stat['MinFamNum'] = 2

            ### <1> ### Load Sequences and Tree
        while out.stat['Interactive'] > 0 or tree.obj['SeqList'] == None:
            tree = rje_tree.treeMenu(out,mainlog,['root=yes']+cmd_list,tree)
            if tree.obj['SeqList'] and tree.opt['Rooted']:
                break
            else:
                print '\n ** Must have loaded sequences and a rooted tree. ** \n'
                if out.stat['Interactive'] < 0 or rje.yesNo('Quit BADASP?',default='N'):
                    sys.exit()

        basename = tree.obj['SeqList'].info['Name']
        if basename[-4:] == '.fas':
            basename = basename[:-4]
        if basename[-4:] == '.anc':
            basename = basename[:-4]
        if basefile:
            basename = basefile
                                
    except SystemExit:
        raise
    except:
        mainlog.errorLog('Major Error in badasp loading sequences and tree',True)

    try:    ### <2> ### Define Subfamilies
        while out.stat['Interactive'] > 0 or tree.groupNum() < 2:
            tree.treeGroup(callmenu=True)
            if tree.groupNum() >= 2:
                break
            else:
                mainlog.errorLog('Must have at least two subfamilies for specificity analyses.',printerror=False)
                if out.stat['Interactive'] < 0 or rje.yesNo('Continue without specificity analyses?'):
                    cmd_list.append('funcspec=')
                    break
                elif rje.yesNo('Abort BADASP?'):
                    sys.exit()
    except SystemExit:
        raise
    except:
        mainlog.errorLog('Major Error in BADASP subfamilies',True)

    try:    ### <3> ### GASP Ancestral Sequence Prediction
        if tree.node[-1].obj['Sequence'] == None:   # No ancseq loaded
            while out.stat['Interactive'] > 0 and rje.yesNo('Use %s for output filenames?' % basename) == False:
                basename = rje.choice('FILEname (FILE.anc.fas, FILE.anc.nsf, FILE.txt)?: ', default=basename)
            mygasp = rje_ancseq.Gasp(tree=tree,ancfile=basename,cmd_list=cmd_list,log=mainlog)
            out.verbose(0,2,'%s' % mygasp.details(),1)
            if out.stat['Interactive'] > 0:
                if rje.yesNo('Use these parameters?') == False:
                    mygasp.edit()
            mygasp.gasp()
    except:
        mainlog.errorLog('Major Error in BADASP GASP',True)
        
    try:    ### <4> ### Peform Functional Specificity and Sequence Conservation Calculations
        _stage = '<4> Specificity/Conservation Analyses'
        aaprop = rje_aaprop.AAPropMatrix(log=mainlog,cmd_list=cmd_list)
        query = tree.obj['SeqList'].obj['QuerySeq']
        ## <a> ## Chosen Methods
        _stage = '<4a> Specificity/Conservation Analyses - Chosen Methods'
        funcspec = rje_specificity.methodlist   # ['BAD','BADN','BADX']
        seqcon = rje_conseq.methodlist # ['info']
        for cmd in cmd_list:
            if cmd.find('funcspec=') == 0:
                funcspec = cmd[9:].split(',')
            if cmd.find('seqcon=') == 0:
                seqcon = cmd[len('seqcon='):].split(',')
        if 'all' in funcspec:
            funcspec = rje_specificity.methodlist
        if 'all' in seqcon:
            seqcon = rje_conseq.methodlist
        for method in ['BADX','BADN','QPCon_Mean','QPCon_Abs','QPCon_Mean_All']:
            while method in funcspec and query == None:
                if rje.yesNo('Method %s needs query but none given. Drop %s from specificity methods?' % (method,method)):
                    funcspec.remove(method)
                    break
                for seq in tree.obj['SeqList'].seq:
                    if rje.yesNo('Method %s needs query but none given. Use sequence 1 (%s)?' % (method,seq.shortName()),default='N'):
                        query = seq
                        tree.obj['SeqList'].obj['Query'] = seq
                        break
            while method in seqcon and query == None:
                if rje.yesNo('Method %s needs query but none given. Drop %s from conservation methods?' % (method,method)):
                    seqcon.remove(method)
                    break
                for seq in tree.obj['SeqList'].seq:
                    if rje.yesNo('Method %s needs query but none given. Use sequence 1 (%s)?' % (method,seq.shortName()),default='N'):
                        query = seq
                        tree.obj['SeqList'].obj['Query'] = seq
                        break
                
        qname = query
        if query:
            qname = query.info['Name']
        out.verbose(0,3,'\nQuery = %s' % qname,2)

        ## <b> ## Spec Calculations
        _stage = '<4b> Specificity Calculations'
        specmatrix = rje_specificity.FuncSpec(log=mainlog,cmd_list=cmd_list,tree=tree,aaprop=aaprop)
        specmatrix.calcScore(query=query,methods=funcspec)

        ## <c> ## Conservation Calculations
        _stage = '<4c> Specificity/Conservation Analyses - Conservation Calculations'
        conseq = rje_conseq.SeqStat(log=mainlog,cmd_list=cmd_list,tree=tree,aaprop=aaprop)
        conseq.calcScore(query=query,methods=seqcon)   ### Sends appropriate seqlist to self.calcScore()

        ## <d> ## Special Case: QPCon vs All seqs
        _stage = '<4d> Specificity/Conservation Analyses - QPCon vs All'
        qpconall = []
        #if 'QPCon_Abs_All' in seqcon and query:
        #    qpconall.append('QPCon_Abs')
        if 'QPCon_Mean_All' in seqcon and query:
            qpconall.append('QPCon_Mean')
        for qp in qpconall:
            conseq.score['%s_All' % qp] = conseq.score[qp] 
            if conseq.alnwin.has_key(qp):
                conseq.alnwin['%s_All' % qp] = conseq.alnwin[qp] 
            if conseq.qrywin.has_key(qp):
                conseq.qrywin['%s_All' % qp] = conseq.qrywin[qp] 
            if conseq.rank.has_key(qp):
                conseq.rank['%s_All' % qp] = conseq.rank[qp] 
            if conseq.alnrankwin.has_key(qp):
                conseq.alnrankwin['%s_All' % qp] = conseq.alnrankwin[qp] 
            if conseq.qryrankwin.has_key(qp):
                conseq.qryrankwin['%s_All' % qp] = conseq.qryrankwin[qp] 
        
        _stage = '<4d> Specificity/Conservation Analyses - FamQP'
        famqp = []
        if 'QPCon_Mean' in seqcon:
            famqp.append('QPCon_Mean')
        if 'QPCon_Abs' in seqcon:
            famqp.append('QPCon_Abs')
        if len(famqp) > 0 and query:    #!# And subfam option?
            qseq = []
            for fam in tree.subfam:
                for node in tree._nodeClade(fam):
                    if query == node.obj['Sequence']:
                        for qnode in tree._nodeClade(fam):
                            qseq.append(qnode.obj['Sequence'])
            conseq.calcScore(query=query,seqlist=qseq,methods=famqp)   ### Sends appropriate seqlist to self.calcScore()

    except:
        mainlog.errorLog('Major Error in BADASP Specificity Analysis (%s):' % _stage,True)
        
    try:    ### <5> ### Full Output Results
        _stage = '<5> Full Output'
        # This output is in a tab- or comma-delimited file for easy manipulation or viewing with other programs.
        # (1) statistics for a given residue;
        # (2) statistics for a given window size across
        # - (a) the whole alignment,    (node=None)
        # - (b) the Query protein of interest (if given) and    (node=QueryNode)
        # - (c) the ancestral sequence of each subfamily;   (node=ancnode)
        # (3) Predicted ancestral sequences at
        # - (a) the root and
        # - (b) the ancestor of each subfamily.
        delimit = rje.getDelimit(cmd_list)

        ## <a> ## Setup        
        _stage = '<5a> Output - Setup'
        rankout = specmatrix.opt['Rank']
        #tree._regenerateSeqList(tree.obj['SeqList'],tree.node)
        root = tree.node[-1].obj['Sequence']      #!# At some point, make sure this is the most ancient duplication!
        out.verbose(0,3,'\nBADASP Results Output (%s.badasp) ...' % basename,0)

        ## <b> ## Header
        _stage = '<5b> Output - Header'
        _header = True
        if append_file:
            if rje.checkForFile(append_file):
               _header = False
            BADASP = open(append_file, 'a')
        else:
            BADASP = open('%s.badasp' % basename, 'w')
            BADASP.write("BADASP Output: %s\n" % (time.asctime(time.localtime(time.time()))))
            BADASP.write('%s\n\n' % cmd_list)        
        header = ['aln_pos','anc_aa'] # Aln Pos and AA
        alnlen = 0
        statlist = funcspec + seqcon
        _stage = '<5b-i> Output - Header Query'
        if query:
            header += ['qry_pos','qry_aa']  # Qry Pos and AA
        _stage = '<5b-ii> Output - Header Subfam'
        for f in range(len(tree.subfam)):
            header += ['fam%d_pos' % (f+1),'fam%d_aa' % (f+1)]   # Subfam Pos and AA
        for func in statlist:
            _stage = '<5b-iii> Output - Header %s' % func
            statobj = statObj(method=func,objlist=[specmatrix,conseq])
            fs = func.lower()
            alnlen = len(statobj.score[func])                
            header.append(fs)                   # Score
            if rankout:
                header.append('%s_rank' % fs)   # Rank
            if statobj.stat['WinSize'] > 1:
                header.append('%s_alnwin' % fs)     # Full align window
                if rankout:
                    header.append('%s_alnrankwin' % fs)   # Rank
                if query:
                    header.append('%s_qrywin' % fs) # Qry window
                    if rankout:
                        header.append('%s_qryrankwin' % fs)   # Rank
                if func in funcspec:
                    for f in range(len(tree.subfam)):
                        header.append('%s_fam%d_win' % (fs,f+1))  # Subfam windows
                        if rankout:
                            header.append('%s_fam%d_rankwin' % (fs,f+1))  # Subfam windows                    
        #if _header:
        BADASP.write('%s\n' % string.join(header, delimit))
        out.verbose(1,3,'%s...' % string.join(header, delimit),0)

        ## <c> ## Stats
        _stage = '<5c> Stats'
        qr = 0  # Qry pos
        fr = [0] * len(tree.subfam) # List of subfam positions
        aa = '' # Root aa
        qa = '' # Qry aa
        fa = [''] * len(tree.subfam) # List of subfam aas
        for r in range(alnlen):
            # <i> # Positions and aas
            _stage = '<5c-i> Output - Stats, positions & aas'
            aa = root.info['Sequence'][r]
            if query:
                qa = query.info['Sequence'][r]
                if qa != '-':
                    qr += 1
            for f in range(len(tree.subfam)):
                fa[f] = tree.subfam[f].obj['Sequence'].info['Sequence'][r]
                if fa[f] != '-':
                    fr[f] += 1

            # <ii> # Positions and AAs ii
            _stage = '<5c-ii> Output - Pos & AA ii'
            line = ['%d' % (r+1), aa]    # Aln Pos and AA
            if query:
                if qa == '-':               
                    line += ['-',qa]  # Qry Pos and AA
                else:               
                    line += ['%d' % qr,qa]  # Qry Pos and AA
            for f in range(len(tree.subfam)):
                if fa[f] == '-':
                    line += ['-',fa[f]]   # Subfam Pos and AA
                else:
                    line += ['%d' % fr[f],fa[f]]   # Subfam Pos and AA

            # <iii> # Stats                        
            _stage = '<5c-iii> Output - Stats'
            for func in statlist:
                statobj = statObj(method=func,objlist=[specmatrix,conseq])
                fs = func.lower()
                line.append(str(statobj.score[func][r]))   # Score
                if rankout:
                    line.append(str(statobj.rank[func][r]))   # Rank
                if specmatrix.stat['WinSize'] > 1:
                    line.append(str(statobj.alnwin[func][r]))     # Full align window
                    if rankout:
                        line.append(str(statobj.alnrankwin[func][r]))   # Rank
                    if query:
                        line.append(str(statobj.qrywin[func][r])) # Qry window
                        if rankout:
                            line.append(str(statobj.qryrankwin[func][r]))   # Rank
                    if func in funcspec:
                        for f in range(len(tree.subfam)):
                            line.append(str(statobj.famwin[func][tree.subfam[f]][r]))  # Subfam windows
                            if rankout:
                                line.append(str(statobj.famrankwin[func][tree.subfam[f]][r]))   # Subfam windows                    
            # <iv> # Writing
            _stage = '<5c-iv> Output - Writing'
            BADASP.write('%s\n' % string.join(line, delimit))
        BADASP.close()
        out.verbose(0,2,'Done!',2)

    except:
        mainlog.errorLog('Fatal Error in BADASP Full output (%s):' % _stage,True)
        BADASP.write('%s\n' % string.join(line, delimit))
        BADASP.close()
                        
    try:    ### <6> ### Partial Results Output 
        _stage = '<6> Partial Output'

        ## <a> ## Setup        
        _stage = '<6a> Output - Setup'
        # statlist & alnlen from above
        _part_append = False
        if out.stat['Interactive'] > 0 and rje.yesNo('Output additional, filtered results?',default='N'):
            partfile = rje.choice('Name for partial results file?:','%s.partial.badasp' % basename,confirm=True)
            if rje.checkForFile(partfile) and rje.yesNo('File %s exists. Append file without headers?' % partfile):
                _part_append = True
        else:
            return
        if rje.yesNo('Filter output columns?',default='N'):
            if rje.yesNo('Output query details (pos,aa & win)?') == False:
                query = None
            f = 1
            for fam in tree.subfam[0:]:
                if rje.yesNo('Output subfam %d (%s) details (pos,aa & win)?' % (f,fam.info['CladeName'])) == False:
                    tree.subfam.remove(fam)
                f += 1
            for func in statlist[0:]:
                if rje.yesNo('Output %s results?' % func) == False:
                    statlist.remove(func)

        alnout = [True] * alnlen
        if rje.yesNo('Filter Rows by Results VALUES?'):
            out.verbose(0,0,'Initial Defaults are minmum values. Accept intital default for no filtering of given Stat.',1)
            for stat in statlist:
                ### Filter by value? ###
                statobj = statObj(method=stat,objlist=[specmatrix,conseq])
                scores = statobj.score[stat][0:]
                scores.sort()
                cutoff = rje.getFloat('Min. value for %s?:' % stat,default='%f' % scores[0],confirm=True)
                for r in range(alnlen):
                    if statobj.score[stat][r] < cutoff:
                        alnout[r] = False
        if rankout and rje.yesNo('Filter Rows by Results RANKS?'):
            out.verbose(0,0,'Ranks range from 0 (low) to 1 (high).',1)
            for stat in statlist:
                ### Filter by Rank? ###
                statobj = statObj(method=stat,objlist=[specmatrix,conseq])
                cutoff = rje.getFloat('Min. rank for %s?:' % stat,default='0.0',confirm=True)
                for r in range(alnlen):
                    if statobj.rank[stat][r] < cutoff:
                        alnout[r] = False
                
        out.verbose(0,3,'\nBADASP Partial Results Output (%s) ...' % partfile,0)

        ## <b> ## Header
        _stage = '<6b> Partial Output - Header'
        if _part_append:
            BADASP = open(partfile, 'a')
        else:
            BADASP = open(partfile, 'w')
            BADASP.write("Partial BADASP Output: %s\n" % (time.asctime(time.localtime(time.time()))))
            BADASP.write('%s\n\n' % cmd_list)        
        header = ['aln_pos','anc_aa'] # Aln Pos and AA
        _stage = '<6b-i> Partial Output - Header Query'
        if query:
            header += ['qry_pos','qry_aa']  # Qry Pos and AA
        _stage = '<6b-ii> Partial Output - Header Subfam'
        for f in range(len(tree.subfam)):
            header += ['fam%d_pos' % (f+1),'fam%d_aa' % (f+1)]   # Subfam Pos and AA
        for func in statlist:
            _stage = '<6b-iii> Partial Output - Header %s' % func
            statobj = statObj(method=func,objlist=[specmatrix,conseq])
            fs = func.lower()
            header.append(fs)                   # Score
            if rankout:
                header.append('%s_rank' % fs)   # Rank
            if statobj.stat['WinSize'] > 1:
                header.append('%s_alnwin' % fs)     # Full align window
                if rankout:
                    header.append('%s_alnrankwin' % fs)   # Rank
                if query:
                    header.append('%s_qrywin' % fs) # Qry window
                    if rankout:
                        header.append('%s_qryrankwin' % fs)   # Rank
                if func in funcspec:
                    for f in range(len(tree.subfam)):
                        header.append('%s_fam%d_win' % (fs,f+1))  # Subfam windows
                        if rankout:
                            header.append('%s_fam%d_rankwin' % (fs,f+1))  # Subfam windows                    
        #if not _part_append:
        BADASP.write('%s\n' % string.join(header, delimit))
        out.verbose(1,3,'%s...' % string.join(header, delimit),0)

        ## <c> ## Stats
        _stage = '<6c> Stats'
        qr = 0  # Qry pos
        fr = [0] * len(tree.subfam) # List of subfam positions
        aa = '' # Root aa
        qa = '' # Qry aa
        fa = [''] * len(tree.subfam) # List of subfam aas
        for r in range(alnlen):
            if alnout[r] == False:
                continue
            # <i> # Positions and aas
            _stage = '<6c-i> Partial Output - Stats, positions & aas'
            aa = root.info['Sequence'][r]
            if query:
                qa = query.info['Sequence'][r]
                if qa != '-':
                    qr += 1
            for f in range(len(tree.subfam)):
                fa[f] = tree.subfam[f].obj['Sequence'].info['Sequence'][r]
                if fa[f] != '-':
                    fr[f] += 1

            # <ii> # Positions and AAs ii
            _stage = '<6c-ii> Partial Output - Pos & AA ii'
            line = ['%d' % (r+1), aa]    # Aln Pos and AA
            if query:
                if qa == '-':               
                    line += ['-',qa]  # Qry Pos and AA
                else:               
                    line += ['%d' % qr,qa]  # Qry Pos and AA
            for f in range(len(tree.subfam)):
                if fa[f] == '-':
                    line += ['-',fa[f]]   # Subfam Pos and AA
                else:
                    line += ['%d' % fr[f],fa[f]]   # Subfam Pos and AA

            # <iii> # Stats                        
            _stage = '<6c-iii> Partial Output - Stats'
            for func in statlist:
                statobj = statObj(method=func,objlist=[specmatrix,conseq])
                fs = func.lower()
                line.append(str(statobj.score[func][r]))   # Score
                if rankout:
                    line.append(str(statobj.rank[func][r]))   # Rank
                if specmatrix.stat['WinSize'] > 1:
                    line.append(str(statobj.alnwin[func][r]))     # Full align window
                    if rankout:
                        line.append(str(statobj.alnrankwin[func][r]))   # Rank
                    if query:
                        line.append(str(statobj.qrywin[func][r])) # Qry window
                        if rankout:
                            line.append(str(statobj.qryrankwin[func][r]))   # Rank
                    if func in funcspec:
                        for f in range(len(tree.subfam)):
                            line.append(str(statobj.famwin[func][tree.subfam[f]][r]))  # Subfam windows
                            if rankout:
                                line.append(str(statobj.famrankwin[func][tree.subfam[f]][r]))   # Subfam windows
            # <iv> # Writing
            _stage = '<6c-iv> Partial Output - Writing'
            BADASP.write('%s\n' % string.join(line, delimit))
        BADASP.close()
        out.verbose(0,2,'Done!',2)
        
    except:
        mainlog.errorLog('Fatal Error in BADASP Partial output (%s):' % _stage,True)
        BADASP.write('%s\n' % string.join(line, delimit))
        BADASP.close()