Ejemplo n.º 1
0
 def makePPI(self):  ### Generates files for Human-HIV PPI analysis
     '''Generates files for Human-HIV PPI analysis.'''
     try:### ~ [1] Setup ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         seqlist = rje_seq.SeqList(self.log,self.cmd_list+['seqin=%s' % self.getStr('HIVSeq'),'autoload=T'])
         if not seqlist.seqs(): return False
         seqmap = seqlist.seqNameDic('Max')
         mdb = self.db('HHPIDMap')
         ### ~ [2] Process ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         for hivacc in mdb.index('AccHIV'):
             # map HIV accession numbers on to sequences seqNameDic
             accnum = string.split(hivacc,'.')[0]
             hivseq = seqmap[accnum]              
             # extract short HIV name from sequence ID
             hivgene = string.split(hivseq.shortName(),'_')[0].upper()
             # create directory named after HIV gene
             #self.progLog('\r#PPI','Generating human-HIV PPI fasta files for %s' % (hivgene))
             rje.mkDir(self,'%s/' % hivgene,log=True)
             # copy human PPI files into directories, adding HIV gene
             ex = 0.0; etot = len(mdb.index('AccHIV')[hivacc])
             for entry in mdb.indexEntries('AccHIV',hivacc):
                 self.progLog('\r#PPI','Generating human-HIV PPI fasta files for %s %s PPI' % (rje.iStr(etot),hivgene))
                 pfile = self.getStr('PPIDir') + entry['Symbol'] + '.ppi.fas'
                 if rje.exists(pfile):
                     FAS = open('%s/%s.%s.ppi.fas' % (hivgene,hivgene.lower(),entry['Symbol']),'w')
                     FAS.write('>%s\n%s\n' % (hivseq.info['Name'],hivseq.getSequence()))
                     FAS.write(open(pfile,'r').read())
                     FAS.close()
                 else: self.errorLog('Cannot find human PPI file for %s interactor "%s"' % (entry['HIV'],entry['Symbol']),printerror=False)
             self.printLog('\r#PPI','Generated human-HIV PPI fasta files for %s %s (%s) PPI.' % (rje.iStr(etot),entry['HIV'],hivgene))                                      
     except: self.errorLog('%s.makePPI error' % self); return False
Ejemplo n.º 2
0
 def save(self):  ### Saves parsed REST output to files
     '''Saves parsed REST output to files.'''
     rbase = '%s%s' % (self.getStr('RestOutDir'),
                       rje.baseFile(self.getStr('RestBase'),
                                    strip_path=True,
                                    keepext=True))
     rje.mkDir(self, self.getStr('RestOutDir'))
     outputs = rje.sortKeys(self.dict['Output'])
     if self.getStrLC('Rest') in outputs: outputs = [self.getStrLC('Rest')]
     elif self.getStrLC('Rest') in ['full', 'text']:
         outfile = '%s.rest' % rbase
         open(outfile, 'w').write(self.restFullOutput())
         self.printLog('#OUT', '%s: %s' % (self.getStrLC('Rest'), outfile))
         return True
     elif self.getStrLC('Rest'):
         self.printLog(
             '#OUTFMT', 'REST output format "%s" not recognised.' %
             self.getStrLC('Rest'))
         if self.i() < 0 or not rje.yesNo('Output all parsed outputs?'):
             return False
         outfile = '%s.rest' % rbase
         open(outfile, 'w').write(self.restFullOutput())
         self.printLog('#OUT', 'full: %s' % (outfile))
         return True
     for rkey in outputs:
         if rkey in self.dict['Outfile']:
             rje.backup(self, self.dict['Outfile'][rkey])
             open(self.dict['Outfile'][rkey],
                  'w').write(self.dict['Output'][rkey])
             self.printLog('#OUT',
                           '%s: %s' % (rkey, self.dict['Outfile'][rkey]))
         elif rkey not in ['intro']:
             self.warnLog('No outfile parsed/generated for %s output' %
                          rkey)
Ejemplo n.º 3
0
 def ppi(self):  ### Remaining protein-protein interactions
     '''Remaining protein-protein interactions.'''
     try:  ### ~ [1] Setup ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         if not self.dict['PPI']: return
         outdir = 'SLiMPID_PPI'
         rje.mkDir(self, outdir)
         badname = []
         ### ~ [2] Process ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         for hub in rje.sortKeys(self.dict['PPI']):
             gene = self.dict['Gene'][hub]
             acc = []
             for name in self.dict['PPI'][hub]:
                 if not name: continue
                 if name in self.dict['Seq']:
                     acc.append(self.dict['Seq'][name].info['AccNum'])
                 elif name not in badname:
                     badname.append(name)
             open('%s/%s.ppi.acc' % (outdir, gene),
                  'w').write(string.join(acc, '\n'))
             self.printLog(
                 '#PPI',
                 '%s => %d individual interactors' % (gene, len(acc)))
         if badname:
             badname.sort()
             self.printLog(
                 '#BAD', '%d "bad" protein names: %s' %
                 (len(badname), string.join(badname, '; ')))
     except:
         self.errorLog('Problem with SLiMPID.setup()', quitchoice=True)
Ejemplo n.º 4
0
    def complexFasta(
            self):  ### Outputs parsed complex datasets in Fasta format
        '''Outputs parsed complex datasets in Fasta format.'''
        try:
            ### Setup ###
            datpath = self.info['OutDir'] + rje.makePath('HPRD_Complexes/')
            rje.mkDir(self, datpath)

            ### Output PPI Datasets ###
            for complex in rje.sortKeys(self.dict['Complex']):
                mylist = []
                for p2 in self.dict['Complex'][complex]:
                    if self.opt['AllIso']:
                        mylist += self.dict['HPRD'][p2]['Seq']
                    else:
                        mylist.append(self.dict['HPRD'][p2]['Seq'])
                sfile = '%s%s_hprd.fas' % (datpath, complex)
                if mylist:
                    self.obj['SeqList'].saveFasta(seqs=mylist, seqfile=sfile)
            self.log.printLog('#FAS', 'HPRD complex fasta output complete.')
        except:
            self.log.errorLog('Error in HPRD.complexFasta()',
                              printerror=True,
                              quitchoice=False)
            raise
Ejemplo n.º 5
0
    def saveFasta(self):  ### Outputs parsed PPI datasets in Fasta format
        '''Outputs parsed PPI datasets in Fasta format.'''
        try:
            ### Setup ###
            datpath = self.info['OutDir'] + rje.makePath('HPRD_Datasets/')
            rje.mkDir(self, datpath)
            ## Check Seqs ##
            for p1 in rje.sortKeys(self.dict['PPI']):
                if 'Seq' not in self.dict['HPRD'][p1]:  #!# KeyError #!#
                    print p1, self.dict['HPRD'][p1]
                    self.deBug('No Seq for %s' % p1)

            ### All sequences ###
            self.obj['SeqList'].saveFasta()
            ### Output PPI Datasets ###
            for p1 in rje.sortKeys(self.dict['PPI']):
                mylist = []
                for p2 in self.dict['PPI'][p1]:
                    if self.opt['AllIso']:
                        mylist += self.dict['HPRD'][p2]['Seq']
                    else:
                        mylist.append(self.dict['HPRD'][p2]['Seq'])
                sfile = '%s%s_hprd.fas' % (datpath,
                                           self.dict['HPRD'][p1]['gene'])
                if mylist:
                    self.obj['SeqList'].saveFasta(seqs=mylist, seqfile=sfile)
            self.log.printLog('#FAS', 'HPRD PPI fasta output complete.')
        except:
            self.log.errorLog('Error in HPRD.saveFasta()',
                              printerror=True,
                              quitchoice=False)
Ejemplo n.º 6
0
 def makePPIDatasets(self):  ### Generate PPI datasets from pairwise data
     '''Generate PPI datasets from pairwise data.'''
     try:  ### ~ [1] Setup ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         rje.mkDir(self, 'YeastPPI/')
         seqdict = self.dict['SeqDict']
         ### ~ [2] Parse data ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         (hx, htot, fx) = (0.0, len(self.dict['PPI']), 0)
         for hub in rje.sortKeys(self.dict['PPI']):
             self.progLog(
                 '\r#FAS', 'Generating %s PPI fasta files: %.2f' %
                 (rje.integerString(fx), hx / htot))
             hx += 100.0
             if len(self.dict['PPI'][hub]) < 3: continue
             seqs = []
             for spoke in self.dict['PPI'][hub]:
                 if spoke not in seqdict: continue
                 seqs.append(seqdict[spoke])
             if len(seqs) < 3: continue
             self.obj['SeqList'].saveFasta(seqs,
                                           rje.makePath('YeastPPI/%s.fas' %
                                                        hub,
                                                        wholepath=True),
                                           log=False)
             fx += 1
         self.printLog(
             '\r#FAS',
             'Generation of %s PPI fasta files from %s hubs complete.' %
             (rje.integerString(fx), rje.integerString(htot)))
     except:
         self.errorLog(rje_zen.Zen().wisdom())
         raise  # Delete this if method error not terrible
Ejemplo n.º 7
0
 def dpi(self):  ### Domain-protein interactions
     '''Domain-protein interactions.'''
     try:  ### ~ [1] Setup ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         if not self.dict['Domain']: return
         outdir = 'SLiMPID_DPI'
         rje.mkDir(self, outdir)
         dpi = {}  # Dictionary of {domain:[interactors]}
         badname = []
         ### ~ [2] Process ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         for dom in rje.sortKeys(self.dict['Domain']):
             dpi[dom] = []
             for hub in self.dict['Domain'][dom]:
                 if hub in self.dict['PPI']:
                     dpi[dom] += self.dict['PPI'][
                         hub]  # Add with redundancy
             for spoke in dpi[dom][0:]:
                 if dpi[dom].count(spoke) == 1:
                     dpi[dom].remove(
                         spoke)  # Must have 2+ domain interactions
             for hub in self.dict['Domain'][dom]:
                 if hub not in self.dict['PPI']: continue
                 for spoke in self.dict['PPI'][hub][0:]:
                     if spoke in dpi[dom]:
                         self.dict['PPI'][hub].remove(spoke)
                         if spoke in self.dict['PPI'] and hub in self.dict[
                                 'PPI'][spoke]:
                             self.dict['PPI'][spoke].remove(hub)
             dpi[dom] = rje.sortUnique(dpi[dom], False, False)
             acc = []
             for name in dpi[dom]:
                 if not name: continue
                 if name in self.dict['Seq']:
                     acc.append(self.dict['Seq'][name].info['AccNum'])
                 elif name not in badname:
                     badname.append(name)
             open('%s/%s.dpi.acc' % (outdir, dom),
                  'w').write(string.join(acc, '\n'))
             self.printLog('#DPI',
                           '%s domain => %d interactors' % (dom, len(acc)))
         if badname:
             badname.sort()
             self.printLog(
                 '#BAD', '%d "bad" protein names: %s' %
                 (len(badname), string.join(badname, '; ')))
         ### ~ [3] Cleanup ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         hx = len(self.dict['PPI'])
         for hub in rje.sortKeys(self.dict['PPI']):
             if hub and self.dict['PPI'][hub]: continue
             self.dict['PPI'].pop(hub)
             self.printLog('#DPI',
                           'No %s PPI left after DPI removed' % hub,
                           screen=False)
         self.printLog(
             '#PPX', '%s of %s PPI hubs remain after DPI removed' %
             (rje.integerString(len(
                 self.dict['PPI'])), rje.integerString(hx)))
     except:
         self.errorLog('Problem with SLiMPID.dpi()', quitchoice=True)
Ejemplo n.º 8
0
 def setup(self):    ### Main class setup method.
     '''Main class setup method.'''
     try:### ~ [1] Setup SeqList ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         self.obj['SeqList'] = rje_seq.SeqList(self.log,['keepblast=T']+self.cmd_list+['autofilter=F','align=F','haqbat=None'])
         self.obj['SeqList']._checkForDup(True)
         if not self.seqNum(): self.errorLog('No sequences loaded!',printerror=False); return False
         if self.opt['AddQueries'] and self.name() not in self.obj['SeqList'].list['Blast2Fas']: self.obj['SeqList'].list['Blast2Fas'].append(self.name())
         ### ~ [2] Setup Results Directory ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         if self.info['HaqDir'].lower() in ['','none']: self.info['HaqDir'] = '%s_HAQESAC/' % rje.baseFile(self.name(), strip_path=True)
         rje.mkDir(self,self.info['HaqDir'])
         return True     # Setup successful
     except: self.errorLog('Problem during %s setup.' % self); return False  # Setup failed
Ejemplo n.º 9
0
 def saveReadMe(self,
                filename='pydocs.txt',
                append=False):  ### Prints docs for modules to file
     '''
     Prints docs for modules to file.
     >> filename:str = output file name
     >> append:boolean
     '''
     try:  ### ~ [1] Setup ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         pydoc = self.obj['PyDoc']
         if append:
             self.printLog('#DOC', 'Appending docstrings to %s' % filename)
             PYDOC = open(filename, 'a')
         else:
             rje.mkDir(self, filename)
             self.printLog('#DOC', 'Writing docstrings to %s' % filename)
             PYDOC = open(filename, 'w')
             PYDOC.write(self.readMeHeader())
         db = self.db('Module')
         dx = 0
         ### ~ [2] Output Docstrings ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         for sourcedir in pydoc.list['SourceDir']:
             PYDOC.write('-%s:\n\n' % sourcedir)
             for pyfile in db.dataKeys():
                 entry = db.data(pyfile)
                 module = entry['Module']
                 if not pyfile.find(sourcedir) >= 0 or not os.path.exists(
                         '%s%s%s.py' % (pydoc.getStr('PyPath'),
                                        rje.makePath(sourcedir), module)):
                     continue
                 ## ~ [2a] ~ Module docstring ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
                 mtxt = '### ~~~ Module %s ~ [%s] ~~~ ###' % (module,
                                                              pyfile)
                 while len(mtxt) < 122:
                     mtxt = mtxt[:5] + '~' + mtxt[5:-5] + '~' + mtxt[-5:]
                 try:
                     PYDOC.write('%s\n\n%s\n' % (mtxt, entry['DocString']))
                     dx += 1
                 except:
                     self.errorLog('Cannot write DocString for %s' % module,
                                   printerror=False)
                     PYDOC.write('%s\n\nDocString Error!\n' % (mtxt))
                     dx += 1
             PYDOC.write('\n\n\n')
         PYDOC.close()
         self.printLog(
             '#DOC', 'Output to %s complete: %s modules.' %
             (filename, rje.iStr(dx)))
     except:
         self.errorLog('Error in %s.saveDocs()' % self.prog())
Ejemplo n.º 10
0
 def setupSourceData(self):    ### Main class setup method.                                                      #V0.0
     '''Setup and optionally download source data.'''
     try:### ~ [0] Setup Source Data ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         db = self.obj['DB']
         sdb = db.addEmptyTable('Source',['Name','File','Status','Entries','URL'],keys=['Name'],log=False)   # Store Source info
         rje.mkDir(self,self.getStr('SourcePath'),True)
         self.printLog('#~~#','## ~~~~~~~~~~~~~~~~~~~~~~ SETUP TAXONOMIC DATA ~~~~~~~~~~~~~~~~~~~~~ ##',timeout=False)
         ### ~ [1] Uniprot species codes file ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         if not self.sourceDataFile('SpecFile'): raise IOError
         ### ~ [2] NCBI TaxID mapping file ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         if not self.sourceDataFile('TaxMap'): raise IOError
         if not self.sourceDataFile('NameMap'): raise IOError
         return True
     except: self.errorLog('%s.setupSourceData failure' % self); return False
Ejemplo n.º 11
0
 def makePPI(self):  ### Generates files for Human-HIV PPI analysis
     '''Generates files for Human-HIV PPI analysis.'''
     try:  ### ~ [1] Setup ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         seqlist = rje_seq.SeqList(
             self.log, self.cmd_list +
             ['seqin=%s' % self.getStr('HIVSeq'), 'autoload=T'])
         if not seqlist.seqs(): return False
         seqmap = seqlist.seqNameDic('Max')
         mdb = self.db('HHPIDMap')
         ### ~ [2] Process ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         for hivacc in mdb.index('AccHIV'):
             # map HIV accession numbers on to sequences seqNameDic
             accnum = string.split(hivacc, '.')[0]
             hivseq = seqmap[accnum]
             # extract short HIV name from sequence ID
             hivgene = string.split(hivseq.shortName(), '_')[0].upper()
             # create directory named after HIV gene
             #self.progLog('\r#PPI','Generating human-HIV PPI fasta files for %s' % (hivgene))
             rje.mkDir(self, '%s/' % hivgene, log=True)
             # copy human PPI files into directories, adding HIV gene
             ex = 0.0
             etot = len(mdb.index('AccHIV')[hivacc])
             for entry in mdb.indexEntries('AccHIV', hivacc):
                 self.progLog(
                     '\r#PPI',
                     'Generating human-HIV PPI fasta files for %s %s PPI' %
                     (rje.iStr(etot), hivgene))
                 pfile = self.getStr(
                     'PPIDir') + entry['Symbol'] + '.ppi.fas'
                 if rje.exists(pfile):
                     FAS = open(
                         '%s/%s.%s.ppi.fas' %
                         (hivgene, hivgene.lower(), entry['Symbol']), 'w')
                     FAS.write('>%s\n%s\n' %
                               (hivseq.info['Name'], hivseq.getSequence()))
                     FAS.write(open(pfile, 'r').read())
                     FAS.close()
                 else:
                     self.errorLog(
                         'Cannot find human PPI file for %s interactor "%s"'
                         % (entry['HIV'], entry['Symbol']),
                         printerror=False)
             self.printLog(
                 '\r#PPI',
                 'Generated human-HIV PPI fasta files for %s %s (%s) PPI.' %
                 (rje.iStr(etot), entry['HIV'], hivgene))
     except:
         self.errorLog('%s.makePPI error' % self)
         return False
Ejemplo n.º 12
0
 def fpi(self):  ### Family-protein interactions
     '''Family-protein interactions.'''
     try:### ~ [1] Setup ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         if not self.dict['Domain']: return
         outdir = 'SLiMPID_FPI'
         rje.mkDir(self,outdir)
         fpi = {}            # Dictionary of {family:[interactors]}
         badname = []
         ### ~ [2] Process ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         for qry in rje.sortKeys(self.dict['PPI']):
             try:
                 fam = self.dict['Fam'][qry]
                 if len(fam) < 2: continue
             except: self.errorLog('Problem with "%s" protein family' % qry); continue
             fpi[qry] = []
             for hub in fam:
                 if hub not in self.dict['PPI']: continue
                 fpi[qry] += self.dict['PPI'][hub]      # Add with redundancy
             for spoke in fpi[qry][0:]:
                 if fpi[qry].count(spoke) == 1: fpi[qry].remove(spoke)   # Must have 2+ family interactions
             for hub in fam:
                 if hub not in self.dict['PPI']: continue
                 for spoke in self.dict['PPI'][hub][0:]:
                     if spoke in fpi[qry]:
                         self.dict['PPI'][hub].remove(spoke)
                         if spoke in self.dict['PPI'] and hub in self.dict['PPI'][spoke]: self.dict['PPI'][spoke].remove(hub)
             fpi[qry] = rje.sortUnique(fpi[qry],False,False)
             acc = []
             gene = self.dict['Gene'][qry]
             for name in fpi[qry]:
                 if not name: continue
                 if name in self.dict['Seq']: acc.append(self.dict['Seq'][name].info['AccNum'])
                 elif name not in badname: badname.append(name)                     
             open('%s/%s.fpi.acc' % (outdir,gene),'w').write(string.join(acc,'\n'))
             self.printLog('#FPI','%s family => %d interactors' % (gene,len(acc)))
         if badname:
             badname.sort()
             self.printLog('#BAD','%d "bad" protein names: %s' % (len(badname),string.join(badname,'; ')))
         ### ~ [3] Cleanup ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         hx = len(self.dict['PPI'])
         for hub in rje.sortKeys(self.dict['PPI']):
             if hub and self.dict['PPI'][hub]: continue
             self.dict['PPI'].pop(hub)
             self.printLog('#FPI','No %s PPI left after FPI removed' % hub)
         self.printLog('#PPX','%s of %s PPI hubs remain after FPI removed' % (rje.integerString(len(self.dict['PPI'])),rje.integerString(hx)))
     except: self.errorLog('Problem with SLiMPID.fpi()',quitchoice=True)
Ejemplo n.º 13
0
 def domainFasta(self):    ### Outputs parsed domain and domain PPI datasets in Fasta format
     '''Outputs parsed PPI datasets in Fasta format.'''
     try:
         ### ~ Tab delimited domain-HPRD pairs ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         headers = ['Domain','HPRD','Gene']
         dfile = self.info['OutDir'] + 'HPRD.domains.tdt'
         rje.delimitedFileOutput(self,dfile,headers,'\t')
         sfile = self.info['OutDir'] + 'HPRD.domsource.tdt'
         shead = ['Domain','Source']
         rje.delimitedFileOutput(self,sfile,shead,'\t')
         dx = 0.0
         for domain in rje.sortKeys(self.dict['Domains']):
             self.log.printLog('\r#DOM','HPRD Domain output (%s): %.1f%%' % (dfile,dx/len(self.dict['Domains'])),newline=False,log=False)
             dx += 100.0
             for hid in self.dict['Domains'][domain]:
                 datadict = {'Domain':domain,'HPRD':hid,'Gene':self.dict['HPRD'][hid]['gene']}
                 rje.delimitedFileOutput(self,dfile,headers,'\t',datadict)
             for source in self.dict['DomainSource'][domain]:
                 datadict = {'Domain':domain,'Source':source}
                 rje.delimitedFileOutput(self,sfile,shead,'\t',datadict)
         self.log.printLog('\r#DOM','HPRD Domain output (%s): %s domains.' % (dfile,rje.integerString(len(self.dict['Domains']))))
                    
         ### ~ Domain PPI Dataset Outputs ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         datpath = self.info['OutDir'] + rje.makePath('HPRD_Domain_Datasets/')
         rje.mkDir(self,datpath)
         for domain in rje.sortKeys(self.dict['Domains']):
             ## Generate a list of all interactors with domain-containing proteins ##
             plist = []
             for p1 in self.dict['Domains'][domain]:
                 if p1 not in self.dict['PPI']: continue
                 for p2 in self.dict['PPI'][p1]:
                     if p2 not in plist: plist.append(p2)
             plist.sort()
             ## Generate Sequence list and output ##
             mylist = []
             for p in plist:
                 if self.opt['AllIso']: mylist += self.dict['HPRD'][p]['Seq']
                 else: mylist.append(self.dict['HPRD'][p]['Seq'])
             sfile = '%s%s_hprd.fas' % (datpath,domain)
             if mylist: self.obj['SeqList'].saveFasta(seqs=mylist,seqfile=sfile)
             else: self.log.printLog('#DOM','No PPI partners for domain "%s"' % domain)
         self.log.printLog('\r#DOM','HPRD Domain fasta output complete.')
     except:
         self.log.errorLog('Error in HPRD.saveFasta()',printerror=True,quitchoice=False)
         raise
Ejemplo n.º 14
0
 def makePPIDatasets(self):  ### Generate PPI datasets from pairwise data
     '''Generate PPI datasets from pairwise data.'''
     try:### ~ [1] Setup ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         rje.mkDir(self,'YeastPPI/')
         seqdict = self.dict['SeqDict']
         ### ~ [2] Parse data ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         (hx,htot,fx) = (0.0,len(self.dict['PPI']),0)
         for hub in rje.sortKeys(self.dict['PPI']):
             self.progLog('\r#FAS','Generating %s PPI fasta files: %.2f' % (rje.integerString(fx),hx/htot)); hx += 100.0
             if len(self.dict['PPI'][hub]) < 3: continue
             seqs = []
             for spoke in self.dict['PPI'][hub]:
                 if spoke not in seqdict: continue
                 seqs.append(seqdict[spoke])
             if len(seqs) < 3: continue
             self.obj['SeqList'].saveFasta(seqs,rje.makePath('YeastPPI/%s.fas' % hub,wholepath=True),log=False); fx+=1
         self.printLog('\r#FAS','Generation of %s PPI fasta files from %s hubs complete.' % (rje.integerString(fx),rje.integerString(htot)))
     except: self.errorLog(rje_zen.Zen().wisdom()); raise   # Delete this if method error not terrible
Ejemplo n.º 15
0
    def complexFasta(self):     ### Outputs parsed complex datasets in Fasta format
        '''Outputs parsed complex datasets in Fasta format.'''
        try:
            ### Setup ###
            datpath = self.info['OutDir'] + rje.makePath('HPRD_Complexes/')
            rje.mkDir(self,datpath)

            ### Output PPI Datasets ###
            for complex in rje.sortKeys(self.dict['Complex']):
                mylist = []
                for p2 in self.dict['Complex'][complex]:
                    if self.opt['AllIso']: mylist += self.dict['HPRD'][p2]['Seq']
                    else: mylist.append(self.dict['HPRD'][p2]['Seq'])
                sfile = '%s%s_hprd.fas' % (datpath,complex)
                if mylist: self.obj['SeqList'].saveFasta(seqs=mylist,seqfile=sfile)
            self.log.printLog('#FAS','HPRD complex fasta output complete.')
        except:
            self.log.errorLog('Error in HPRD.complexFasta()',printerror=True,quitchoice=False)
            raise
Ejemplo n.º 16
0
 def ppi(self):  ### Remaining protein-protein interactions
     '''Remaining protein-protein interactions.'''
     try:### ~ [1] Setup ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         if not self.dict['PPI']: return
         outdir = 'SLiMPID_PPI'
         rje.mkDir(self,outdir)
         badname = []
         ### ~ [2] Process ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         for hub in rje.sortKeys(self.dict['PPI']):
             gene = self.dict['Gene'][hub]
             acc = []
             for name in self.dict['PPI'][hub]:
                 if not name: continue
                 if name in self.dict['Seq']: acc.append(self.dict['Seq'][name].info['AccNum'])
                 elif name not in badname: badname.append(name)                     
             open('%s/%s.ppi.acc' % (outdir,gene),'w').write(string.join(acc,'\n'))
             self.printLog('#PPI','%s => %d individual interactors' % (gene,len(acc)))
         if badname:
             badname.sort()
             self.printLog('#BAD','%d "bad" protein names: %s' % (len(badname),string.join(badname,'; ')))
     except: self.errorLog('Problem with SLiMPID.setup()',quitchoice=True)
Ejemplo n.º 17
0
 def dpi(self):  ### Domain-protein interactions
     '''Domain-protein interactions.'''
     try:### ~ [1] Setup ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         if not self.dict['Domain']: return
         outdir = 'SLiMPID_DPI'
         rje.mkDir(self,outdir)
         dpi = {}            # Dictionary of {domain:[interactors]}
         badname = []
         ### ~ [2] Process ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         for dom in rje.sortKeys(self.dict['Domain']):
             dpi[dom] = []
             for hub in self.dict['Domain'][dom]:
                 if hub in self.dict['PPI']: dpi[dom] += self.dict['PPI'][hub]      # Add with redundancy
             for spoke in dpi[dom][0:]:
                 if dpi[dom].count(spoke) == 1: dpi[dom].remove(spoke)   # Must have 2+ domain interactions
             for hub in self.dict['Domain'][dom]:
                 if hub not in self.dict['PPI']: continue
                 for spoke in self.dict['PPI'][hub][0:]:
                     if spoke in dpi[dom]:
                         self.dict['PPI'][hub].remove(spoke)
                         if spoke in self.dict['PPI'] and hub in self.dict['PPI'][spoke]: self.dict['PPI'][spoke].remove(hub)
             dpi[dom] = rje.sortUnique(dpi[dom],False,False)
             acc = []
             for name in dpi[dom]:
                 if not name: continue
                 if name in self.dict['Seq']: acc.append(self.dict['Seq'][name].info['AccNum'])
                 elif name not in badname: badname.append(name) 
             open('%s/%s.dpi.acc' % (outdir,dom),'w').write(string.join(acc,'\n'))
             self.printLog('#DPI','%s domain => %d interactors' % (dom,len(acc)))
         if badname:
             badname.sort()
             self.printLog('#BAD','%d "bad" protein names: %s' % (len(badname),string.join(badname,'; ')))
         ### ~ [3] Cleanup ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         hx = len(self.dict['PPI'])
         for hub in rje.sortKeys(self.dict['PPI']):
             if hub and self.dict['PPI'][hub]: continue
             self.dict['PPI'].pop(hub)
             self.printLog('#DPI','No %s PPI left after DPI removed' % hub,screen=False)
         self.printLog('#PPX','%s of %s PPI hubs remain after DPI removed' % (rje.integerString(len(self.dict['PPI'])),rje.integerString(hx)))
     except: self.errorLog('Problem with SLiMPID.dpi()',quitchoice=True)
Ejemplo n.º 18
0
 def setup(self):  ### Main class setup method.
     '''Main class setup method.'''
     try:  ### ~ [1] Setup SeqList ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         self.obj['SeqList'] = rje_seq.SeqList(
             self.log, ['keepblast=T'] + self.cmd_list +
             ['autofilter=F', 'align=F', 'haqbat=None'])
         self.obj['SeqList']._checkForDup(True)
         if not self.seqNum():
             self.errorLog('No sequences loaded!', printerror=False)
             return False
         if self.opt['AddQueries'] and self.name(
         ) not in self.obj['SeqList'].list['Blast2Fas']:
             self.obj['SeqList'].list['Blast2Fas'].append(self.name())
         ### ~ [2] Setup Results Directory ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         if self.info['HaqDir'].lower() in ['', 'none']:
             self.info['HaqDir'] = '%s_HAQESAC/' % rje.baseFile(
                 self.name(), strip_path=True)
         rje.mkDir(self, self.info['HaqDir'])
         return True  # Setup successful
     except:
         self.errorLog('Problem during %s setup.' % self)
         return False  # Setup failed
Ejemplo n.º 19
0
 def gopher(self):  ### Sets up data for GOPHER run
     '''Sets up data for GOPHER run.'''
     try:### ~ [1] Setup ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         rje.mkDir(self,'BLAST/')
         rje_blast.BLASTRun(self.log,self.cmd_list).formatDB(fasfile='%s.ygob.fas' % self.info['Basefile'],protein=True,force=False)
         rje_blast.BLASTRun(self.log,self.cmd_list).formatDB(fasfile='%s.yeast.fas' % self.info['Basefile'],protein=True,force=False)
         seqdict = self.obj['SeqList'].seqNameDic('AccNum')
         ymap = self.dict['PillarMap'] = {}
         ### ~ [2] Convert Pillars to BLAST IDs ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         (px,ptot) = (0.0,len(self.list['Pillars'])); ox = 0
         for pillar in self.list['Pillars']:
             self.progLog('\r#YGOB','Converting YGOB Pillars for GOPHER: %.2f%%' % (px/ptot)); px += 100
             newpillar = []
             for yid in pillar:
                 seq = rje_sequence.Sequence(self.log,self.cmd_list)
                 seq.opt['Yeast'] = True
                 #self.deBug(yid)
                 seq.info['Name'] = yid
                 seq.extractDetails(gnspacc=True)
                 #self.deBug(seq.info)
                 ygob = seq.info['AccNum']
                 if ygob in self.dict['Rename']: acc = self.dict['Rename'][ygob]
                 else: acc = ygob
                 ymap[yid] = acc
                 if acc not in seqdict: self.printLog('\r#GENE','Non-coding gene %s (%s)? Cannot find in fasta file' % (acc,yid)); continue
                 try:
                     newpillar.append(seqdict[acc].shortName())
                 except:
                     print yid, ygob, acc
                     self.errorLog(rje_zen.Zen().wisdom())
             if not newpillar: continue
             for ygob in pillar:
                 acc = ymap[ygob]
                 if acc not in seqdict: continue
                 if acc in self.list['YeastSeq'] or (not self.list['YeastSeq'] and seqdict[acc].info['SpecCode'] == 'YEAST'):
                     open(rje.makePath('BLAST/%s.blast.id' % acc,wholepath=True),'w').write(string.join(newpillar,'\n'))
                     ox += 1
         self.progLog('\r#YGOB','Converted YGOB Pillars for GOPHER: %s BLAST ID files.' % rje.iStr(ox))
     except: self.errorLog(rje_zen.Zen().wisdom()); raise   # Delete this if method error not terrible
Ejemplo n.º 20
0
    def saveFasta(self):    ### Outputs parsed PPI datasets in Fasta format
        '''Outputs parsed PPI datasets in Fasta format.'''
        try:
            ### Setup ###
            datpath = self.info['OutDir'] + rje.makePath('HPRD_Datasets/')
            rje.mkDir(self,datpath)
            ## Check Seqs ##
            for p1 in rje.sortKeys(self.dict['PPI']):
                if 'Seq' not in self.dict['HPRD'][p1]:      #!# KeyError #!#
                    print p1, self.dict['HPRD'][p1]
                    self.deBug('No Seq for %s' % p1)

            ### All sequences ###
            self.obj['SeqList'].saveFasta()
            ### Output PPI Datasets ###
            for p1 in rje.sortKeys(self.dict['PPI']):
                mylist = []
                for p2 in self.dict['PPI'][p1]:
                    if self.opt['AllIso']: mylist += self.dict['HPRD'][p2]['Seq']
                    else: mylist.append(self.dict['HPRD'][p2]['Seq'])
                sfile = '%s%s_hprd.fas' % (datpath,self.dict['HPRD'][p1]['gene'])
                if mylist: self.obj['SeqList'].saveFasta(seqs=mylist,seqfile=sfile)
            self.log.printLog('#FAS','HPRD PPI fasta output complete.')
        except: self.log.errorLog('Error in HPRD.saveFasta()',printerror=True,quitchoice=False)
Ejemplo n.º 21
0
 def save(self):     ### Saves parsed REST output to files
     '''Saves parsed REST output to files.'''
     rbase = '%s%s' % (self.getStr('RestOutDir'),rje.baseFile(self.getStr('RestBase'),strip_path=True,keepext=True))
     rje.mkDir(self,self.getStr('RestOutDir'))
     outputs = rje.sortKeys(self.dict['Output'])
     if self.getStrLC('Rest') in outputs: outputs = [self.getStrLC('Rest')]
     elif self.getStrLC('Rest') in ['full','text']:
         outfile = '%s.rest' % rbase
         open(outfile,'w').write(self.restFullOutput())
         self.printLog('#OUT','%s: %s' % (self.getStrLC('Rest'),outfile))
         return True
     elif self.getStrLC('Rest'):
         self.printLog('#OUTFMT','REST output format "%s" not recognised.' % self.getStrLC('Rest'))
         if self.i() < 0 or not rje.yesNo('Output all parsed outputs?'): return False
         outfile = '%s.rest' % rbase
         open(outfile,'w').write(self.restFullOutput())
         self.printLog('#OUT','full: %s' % (outfile))
         return True
     for rkey in outputs:
         if rkey in self.dict['Outfile']:
             rje.backup(self,self.dict['Outfile'][rkey])
             open(self.dict['Outfile'][rkey],'w').write(self.dict['Output'][rkey])
             self.printLog('#OUT','%s: %s' % (rkey,self.dict['Outfile'][rkey]))
         elif rkey not in ['intro']: self.warnLog('No outfile parsed/generated for %s output' % rkey)
Ejemplo n.º 22
0
 def saveReadMe(self,filename='pydocs.txt',append=False):      ### Prints docs for modules to file
     '''
     Prints docs for modules to file.
     >> filename:str = output file name
     >> append:boolean
     '''
     try:### ~ [1] Setup ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         pydoc = self.obj['PyDoc']
         if append:
             self.printLog('#DOC','Appending docstrings to %s' % filename)
             PYDOC = open(filename,'a')
         else:
             rje.mkDir(self,filename)
             self.printLog('#DOC','Writing docstrings to %s' % filename)
             PYDOC = open(filename,'w')
             PYDOC.write(self.readMeHeader())
         db = self.db('Module')
         dx = 0
         ### ~ [2] Output Docstrings ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         for sourcedir in pydoc.list['SourceDir']:
             PYDOC.write('-%s:\n\n' % sourcedir)
             for pyfile in db.dataKeys():
                 entry = db.data(pyfile)
                 module = entry['Module']
                 if not pyfile.find(sourcedir) >= 0 or not os.path.exists('%s%s%s.py' % (pydoc.getStr('PyPath'),rje.makePath(sourcedir),module)): continue
                 ## ~ [2a] ~ Module docstring ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
                 mtxt = '### ~~~ Module %s ~ [%s] ~~~ ###' % (module,pyfile)
                 while len(mtxt) < 122: mtxt = mtxt[:5] + '~' + mtxt[5:-5] + '~' + mtxt[-5:]
                 try: PYDOC.write('%s\n\n%s\n' % (mtxt,entry['DocString'])); dx += 1
                 except:
                     self.errorLog('Cannot write DocString for %s' % module,printerror=False)
                     PYDOC.write('%s\n\nDocString Error!\n' % (mtxt)); dx += 1
             PYDOC.write('\n\n\n')
         PYDOC.close()
         self.printLog('#DOC','Output to %s complete: %s modules.' % (filename,rje.iStr(dx)))
     except: self.errorLog('Error in %s.saveDocs()' % self.prog())
Ejemplo n.º 23
0
    def mapPhosByBLAST(self,fasfile):   ### BLAST sequences against phosphoDB, align hits & mark sites (ID & Homology)
        '''BLAST sequences against phosphoDB, align hits and mark phosphosites (ID & Homology).'''
        try:### ~ [1] Setup ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
            ## ~ [1a] Setup fasfile ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
            scmd = self.cmd_list + ['seqin=%s' % fasfile,'autoload=T','autofilter=F']
            qseqlist = rje_seq.SeqList(self.log,scmd)
            qdict = qseqlist.seqNameDic()
            ## ~ [1b] Setup results files/directories ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
            basefile = rje.baseFile(fasfile)
            if self.info['PhosRes'].lower() in ['','none']: self.info['PhosRes'] = '%s.phosres.tdt' % basefile
            headers = ['Name','Pos','AA','PELM','PELMPos','Evidence']
            delimit = rje.getDelimit(self.cmd_list,rje.delimitFromExt(filename=self.info['PhosRes']))
            rje.delimitedFileOutput(self,self.info['PhosRes'],headers,delimit,rje_backup=True)
            ppath = rje.makePath('PhosALN')
            rje.mkDir(self,ppath)
            ## ~ [1c] Setup BLAST ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
            pblast = rje_blast.BLASTRun(self.log,self.cmd_list+['formatdb=F'])
            pblast.setInfo({'Name':'%s.p.blast' % rje.baseFile(fasfile),'DBase':self.info['PELMFas'],'InFile':fasfile})
            pblast.setStat({'HitAln':pblast.stat['OneLine']})
            pblast.opt['Complexity Filter'] = False
            pblast.formatDB(force=False)
            ## ~ [1d] Setup GABLAM Stats ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
            gkey = 'GABLAMO ID' #x# % self.info['GABLAMO Key']
            for g in ['ID','Hom']:
                if self.stat['%sSim' % g] < 1.0: self.stat['%sSim' % g] *= 100.0
                self.stat['%sSim' % g] = max(0.0,self.stat['%sSim' % g])

            ### ~ [2] PhosphoBLAST ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
            pblast.blast(use_existing=True,log=True)    # BLAST
            pblast.readBLAST(gablam=True)               # Read in
            while pblast.search:
                ## ~ [2a] Align relevant hits from each BLAST ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
                search = pblast.search.pop(0)
                qseq = qdict[search.info['Name']]
                idlist = []
                qlen = qseq.aaLen()
                hitdict = search.hitSeq(self.obj['SeqList'])
                aln = rje_seq.SeqList(self.log,self.cmd_list+['autoload=F','autofilter=F'])
                aln.seq = [qseq]
                pdict = {}      # Dictionary of {hseq:[poslist]}
                rdict = {qseq:0}      # Dictionary of {hseq:res}
                for hit in search.hit[0:]:
                    hseq = hitdict[hit]
                    pdict[hseq] = []
                    for pos in rje.sortKeys(self.dict['PhosphoSites'][hseq.info['AccNum']]): pdict[hseq].append(pos)
                    if hit.info['Name'] == search.info['Name']:
                        if qseq.getSequence(case=False,gaps=False) != hseq.getSequence(case=False,gaps=False):
                            self.log.errorLog('Major problem: Search/Hit sequence mismatch for same sequence "%s"' % hit.info['Name'])
                        idlist.append(qseq)
                        pdict[qseq] = pdict.pop(hseq)
                        continue
                    gdict = hit.globalFromLocal(qlen)
                    qvh = float(100 * gdict['Query'][gkey]) / float(qlen)
                    if qvh < self.stat['HomSim']:
                        pdict.pop(hseq)
                        continue
                    aln.seq.append(hseq)
                    if (qseq.sameSpec(hseq) or not self.opt['UseSpec']) and qvh >= self.stat['IDSim']: idlist.append(hseq)
                    rdict[hseq] = 0
                aln.muscleAln()   #x#outfile='%s%s.phosaln.fas' % (ppath,qseq.info['AccNum']))
                aln._addSeq('PhosAln','-' * qseq.seqLen())
                aln.info['Name'] = '%s%s.phosaln.fas' % (ppath,qseq.info['AccNum'])
                ## ~ [2b] Map phosphorylations ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
                print '>>>\n', aln.seq, pdict.keys(), rdict.keys()
                for a in range(qseq.seqLen()):
                    if qseq.info['Sequence'][a] != '-': rdict[qseq] += 1
                    for hseq in pdict:
                        if hseq.info['Sequence'][a] == '-': continue
                        if hseq != qseq: rdict[hseq] += 1
                        if rdict[hseq] in pdict[hseq] and qseq.info['Sequence'][a] == hseq.info['Sequence'][a]:  # Phosphosite
                            pdata = {'Name':search.info['Name'],'Pos':rdict[qseq],'AA':qseq.info['Sequence'][a],
                                     'PELM':hseq.shortName(),'PELMPos':rdict[hseq],'Evidence':'Hom'}
                            if hseq == qseq: pdata['Evidence'] = 'Self'
                            elif hseq in idlist: pdata['Evidence'] = 'ID'
                            rje.delimitedFileOutput(self,self.info['PhosRes'],headers,delimit,pdata)
                            self.addPhos(aln.seq[-1],a,pdata['Evidence'])
                ## ~ [2c] Add Scansite/NetPhos if made? ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
                ## ~ [2d] Save alignment ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
                aln.saveFasta()


            # Align hits for each > X %ID
            # Map phosphosites onto alignment and output #
            
            return
        except: self.log.errorLog('Problem during PhosphoSeq.mapPhosByBLAST')
Ejemplo n.º 24
0
    def mapPhosByBLAST(
        self, fasfile
    ):  ### BLAST sequences against phosphoDB, align hits & mark sites (ID & Homology)
        '''BLAST sequences against phosphoDB, align hits and mark phosphosites (ID & Homology).'''
        try:  ### ~ [1] Setup ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
            ## ~ [1a] Setup fasfile ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
            scmd = self.cmd_list + [
                'seqin=%s' % fasfile, 'autoload=T', 'autofilter=F'
            ]
            qseqlist = rje_seq.SeqList(self.log, scmd)
            qdict = qseqlist.seqNameDic()
            ## ~ [1b] Setup results files/directories ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
            basefile = rje.baseFile(fasfile)
            if self.info['PhosRes'].lower() in ['', 'none']:
                self.info['PhosRes'] = '%s.phosres.tdt' % basefile
            headers = ['Name', 'Pos', 'AA', 'PELM', 'PELMPos', 'Evidence']
            delimit = rje.getDelimit(
                self.cmd_list,
                rje.delimitFromExt(filename=self.info['PhosRes']))
            rje.delimitedFileOutput(self,
                                    self.info['PhosRes'],
                                    headers,
                                    delimit,
                                    rje_backup=True)
            ppath = rje.makePath('PhosALN')
            rje.mkDir(self, ppath)
            ## ~ [1c] Setup BLAST ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
            pblast = rje_blast.BLASTRun(self.log,
                                        self.cmd_list + ['formatdb=F'])
            pblast.setInfo({
                'Name': '%s.p.blast' % rje.baseFile(fasfile),
                'DBase': self.info['PELMFas'],
                'InFile': fasfile
            })
            pblast.setStat({'HitAln': pblast.stat['OneLine']})
            pblast.opt['Complexity Filter'] = False
            pblast.formatDB(force=False)
            ## ~ [1d] Setup GABLAM Stats ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
            gkey = 'GABLAMO ID'  #x# % self.info['GABLAMO Key']
            for g in ['ID', 'Hom']:
                if self.stat['%sSim' % g] < 1.0:
                    self.stat['%sSim' % g] *= 100.0
                self.stat['%sSim' % g] = max(0.0, self.stat['%sSim' % g])

            ### ~ [2] PhosphoBLAST ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
            pblast.blast(use_existing=True, log=True)  # BLAST
            pblast.readBLAST(gablam=True)  # Read in
            while pblast.search:
                ## ~ [2a] Align relevant hits from each BLAST ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
                search = pblast.search.pop(0)
                qseq = qdict[search.info['Name']]
                idlist = []
                qlen = qseq.aaLen()
                hitdict = search.hitSeq(self.obj['SeqList'])
                aln = rje_seq.SeqList(
                    self.log, self.cmd_list + ['autoload=F', 'autofilter=F'])
                aln.seq = [qseq]
                pdict = {}  # Dictionary of {hseq:[poslist]}
                rdict = {qseq: 0}  # Dictionary of {hseq:res}
                for hit in search.hit[0:]:
                    hseq = hitdict[hit]
                    pdict[hseq] = []
                    for pos in rje.sortKeys(
                            self.dict['PhosphoSites'][hseq.info['AccNum']]):
                        pdict[hseq].append(pos)
                    if hit.info['Name'] == search.info['Name']:
                        if qseq.getSequence(case=False,
                                            gaps=False) != hseq.getSequence(
                                                case=False, gaps=False):
                            self.log.errorLog(
                                'Major problem: Search/Hit sequence mismatch for same sequence "%s"'
                                % hit.info['Name'])
                        idlist.append(qseq)
                        pdict[qseq] = pdict.pop(hseq)
                        continue
                    gdict = hit.globalFromLocal(qlen)
                    qvh = float(100 * gdict['Query'][gkey]) / float(qlen)
                    if qvh < self.stat['HomSim']:
                        pdict.pop(hseq)
                        continue
                    aln.seq.append(hseq)
                    if (qseq.sameSpec(hseq) or not self.opt['UseSpec']
                        ) and qvh >= self.stat['IDSim']:
                        idlist.append(hseq)
                    rdict[hseq] = 0
Ejemplo n.º 25
0
 def gopher(self):  ### Sets up data for GOPHER run
     '''Sets up data for GOPHER run.'''
     try:  ### ~ [1] Setup ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         rje.mkDir(self, 'BLAST/')
         rje_blast.BLASTRun(self.log, self.cmd_list).formatDB(
             fasfile='%s.ygob.fas' % self.info['Basefile'],
             protein=True,
             force=False)
         rje_blast.BLASTRun(self.log, self.cmd_list).formatDB(
             fasfile='%s.yeast.fas' % self.info['Basefile'],
             protein=True,
             force=False)
         seqdict = self.obj['SeqList'].seqNameDic('AccNum')
         ymap = self.dict['PillarMap'] = {}
         ### ~ [2] Convert Pillars to BLAST IDs ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         (px, ptot) = (0.0, len(self.list['Pillars']))
         ox = 0
         for pillar in self.list['Pillars']:
             self.progLog(
                 '\r#YGOB',
                 'Converting YGOB Pillars for GOPHER: %.2f%%' % (px / ptot))
             px += 100
             newpillar = []
             for yid in pillar:
                 seq = rje_sequence.Sequence(self.log, self.cmd_list)
                 seq.opt['Yeast'] = True
                 #self.deBug(yid)
                 seq.info['Name'] = yid
                 seq.extractDetails(gnspacc=True)
                 #self.deBug(seq.info)
                 ygob = seq.info['AccNum']
                 if ygob in self.dict['Rename']:
                     acc = self.dict['Rename'][ygob]
                 else:
                     acc = ygob
                 ymap[yid] = acc
                 if acc not in seqdict:
                     self.printLog(
                         '\r#GENE',
                         'Non-coding gene %s (%s)? Cannot find in fasta file'
                         % (acc, yid))
                     continue
                 try:
                     newpillar.append(seqdict[acc].shortName())
                 except:
                     print yid, ygob, acc
                     self.errorLog(rje_zen.Zen().wisdom())
             if not newpillar: continue
             for ygob in pillar:
                 acc = ymap[ygob]
                 if acc not in seqdict: continue
                 if acc in self.list['YeastSeq'] or (
                         not self.list['YeastSeq']
                         and seqdict[acc].info['SpecCode'] == 'YEAST'):
                     open(
                         rje.makePath('BLAST/%s.blast.id' % acc,
                                      wholepath=True),
                         'w').write(string.join(newpillar, '\n'))
                     ox += 1
         self.progLog(
             '\r#YGOB',
             'Converted YGOB Pillars for GOPHER: %s BLAST ID files.' %
             rje.iStr(ox))
     except:
         self.errorLog(rje_zen.Zen().wisdom())
         raise  # Delete this if method error not terrible
Ejemplo n.º 26
0
 def fpi(self):  ### Family-protein interactions
     '''Family-protein interactions.'''
     try:  ### ~ [1] Setup ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         if not self.dict['Domain']: return
         outdir = 'SLiMPID_FPI'
         rje.mkDir(self, outdir)
         fpi = {}  # Dictionary of {family:[interactors]}
         badname = []
         ### ~ [2] Process ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         for qry in rje.sortKeys(self.dict['PPI']):
             try:
                 fam = self.dict['Fam'][qry]
                 if len(fam) < 2: continue
             except:
                 self.errorLog('Problem with "%s" protein family' % qry)
                 continue
             fpi[qry] = []
             for hub in fam:
                 if hub not in self.dict['PPI']: continue
                 fpi[qry] += self.dict['PPI'][hub]  # Add with redundancy
             for spoke in fpi[qry][0:]:
                 if fpi[qry].count(spoke) == 1:
                     fpi[qry].remove(
                         spoke)  # Must have 2+ family interactions
             for hub in fam:
                 if hub not in self.dict['PPI']: continue
                 for spoke in self.dict['PPI'][hub][0:]:
                     if spoke in fpi[qry]:
                         self.dict['PPI'][hub].remove(spoke)
                         if spoke in self.dict['PPI'] and hub in self.dict[
                                 'PPI'][spoke]:
                             self.dict['PPI'][spoke].remove(hub)
             fpi[qry] = rje.sortUnique(fpi[qry], False, False)
             acc = []
             gene = self.dict['Gene'][qry]
             for name in fpi[qry]:
                 if not name: continue
                 if name in self.dict['Seq']:
                     acc.append(self.dict['Seq'][name].info['AccNum'])
                 elif name not in badname:
                     badname.append(name)
             open('%s/%s.fpi.acc' % (outdir, gene),
                  'w').write(string.join(acc, '\n'))
             self.printLog('#FPI',
                           '%s family => %d interactors' % (gene, len(acc)))
         if badname:
             badname.sort()
             self.printLog(
                 '#BAD', '%d "bad" protein names: %s' %
                 (len(badname), string.join(badname, '; ')))
         ### ~ [3] Cleanup ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         hx = len(self.dict['PPI'])
         for hub in rje.sortKeys(self.dict['PPI']):
             if hub and self.dict['PPI'][hub]: continue
             self.dict['PPI'].pop(hub)
             self.printLog('#FPI', 'No %s PPI left after FPI removed' % hub)
         self.printLog(
             '#PPX', '%s of %s PPI hubs remain after FPI removed' %
             (rje.integerString(len(
                 self.dict['PPI'])), rje.integerString(hx)))
     except:
         self.errorLog('Problem with SLiMPID.fpi()', quitchoice=True)
Ejemplo n.º 27
0
    def mapRegionsToSequences(
            self):  ### Maps tabulates PPI regions onto sequence datasets
        '''Maps tabulates PPI regions onto sequence datasets.'''
        try:  ### ~ [1] Setup ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
            minseq = 3
            outdir = 'RegPPI/'
            adddir = 'RegPPIAdd/'
            rje.mkDir(self, outdir)
            rje.mkDir(self, adddir)
            tabfile = 'ppi_region.tdt'
            region = rje.dataDict(self,
                                  tabfile, ['Interactor', 'Protein'],
                                  ['Start', 'End'],
                                  lists=True)
            ### ~ [2] Work through each pair in turn ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
            px = 0.0
            ptot = len(region)
            fx = 0
            for pair in rje.sortKeys(region):
                self.progLog('\r#FAS',
                             'Generating fasta files: %.2f%%' % (px / ptot))
                px += 100.0
                ## ~ [2a] Map sequences to PPI dictionary ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
                [hub, spoke] = string.split(pair, '\t')
                try:
                    qryseq = self.dict['SeqObj'][spoke]
                except:
                    self.printLog(
                        '\n#QRY',
                        'Spoke gene "%s" missing from Sequence file' % spoke)
                    continue
                try:
                    spoke = self.dict['Seq2Gene'][spoke]
                except:
                    self.printLog(
                        '\n#QRY',
                        'Spoke protein "%s" missing from PPI dictionary' %
                        spoke)
                    continue
                if hub not in self.dict['PPI']:
                    self.printLog(
                        '\n#HUB',
                        'Hub gene "%s" missing from PPI dictionary' % hub)
                    continue
                addspoke = spoke not in self.dict['PPI'][hub]
                if addspoke:
                    self.dict['PPI'][hub].append(spoke)
                    self.printLog(
                        '\n#PPI',
                        'Added spoke gene "%s" to hub "%s" interactome' %
                        (spoke, hub))
                if len(self.dict['PPI'][hub]) < minseq:
                    self.printLog(
                        '\n#HUB',
                        'Hub "%s" interactome too small (<%s spokes)' %
                        (hub, minseq))
                    continue
                ## ~ [2b] Identify query sequence ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
                reglist = []
                for pos in region[pair]['Start'] + region[pair]['End']:
                    reglist.append(string.atoi(pos))
                reglist.sort()
                qsequence = qryseq.info['Sequence'].lower()
                self.deBug(len(qsequence))
                self.deBug(qsequence)
                prelen = len(qsequence)
                while reglist:
                    self.deBug(reglist)
                    try:
                        startx = reglist.pop(0) - 1
                        endx = reglist.pop(0)
                    except:
                        self.errorLog('%s PPI Region problem: %s' %
                                      (pair, region[pair]))
                        continue
                    self.deBug(qsequence[startx - 1:endx + 1].upper())
                    qsequence = qsequence[:startx] + qsequence[
                        startx:endx].upper() + qsequence[endx:]
                self.deBug(qsequence)
                if len(qsequence) != prelen:
                    self.printLog('#F**K', '%s' % region[pair])
                    self.printLog('#F**K', qryseq.info['Sequence'].lower())
                    self.printLog('#F**K', qsequence)
                    raise ValueError
                ## ~ [2c] Output sequences ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
                if addspoke:
                    outfile = '%s%s.%s.fas' % (adddir, hub, spoke)
                    ox = 1
                else:
                    outfile = '%s%s.%s.fas' % (outdir, hub, spoke)
                    ox = 1
                open(outfile,
                     'w').write('>%s\n%s\n' % (qryseq.info['Name'], qsequence))
                for spoke2 in self.dict['PPI'][hub]:
                    if spoke2 == spoke: continue
                    try:
                        sseq = self.dict['SeqObj'][self.dict['Gene2Seq']
                                                   [spoke2]]
                        open(outfile, 'a').write(
                            '>%s\n%s\n' %
                            (sseq.info['Name'], sseq.info['Sequence']))
                        ox += 1
                    except:
                        pass
                self.printLog('\n#FAS',
                              '%s sequences output to %s' % (ox, outfile))

        except:
            self.errorLog(rje_zen.Zen().wisdom())
            raise  # Delete this if method error not terrible
Ejemplo n.º 28
0
    def domainFasta(
        self
    ):  ### Outputs parsed domain and domain PPI datasets in Fasta format
        '''Outputs parsed PPI datasets in Fasta format.'''
        try:
            ### ~ Tab delimited domain-HPRD pairs ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
            headers = ['Domain', 'HPRD', 'Gene']
            dfile = self.info['OutDir'] + 'HPRD.domains.tdt'
            rje.delimitedFileOutput(self, dfile, headers, '\t')
            sfile = self.info['OutDir'] + 'HPRD.domsource.tdt'
            shead = ['Domain', 'Source']
            rje.delimitedFileOutput(self, sfile, shead, '\t')
            dx = 0.0
            for domain in rje.sortKeys(self.dict['Domains']):
                self.log.printLog('\r#DOM',
                                  'HPRD Domain output (%s): %.1f%%' %
                                  (dfile, dx / len(self.dict['Domains'])),
                                  newline=False,
                                  log=False)
                dx += 100.0
                for hid in self.dict['Domains'][domain]:
                    datadict = {
                        'Domain': domain,
                        'HPRD': hid,
                        'Gene': self.dict['HPRD'][hid]['gene']
                    }
                    rje.delimitedFileOutput(self, dfile, headers, '\t',
                                            datadict)
                for source in self.dict['DomainSource'][domain]:
                    datadict = {'Domain': domain, 'Source': source}
                    rje.delimitedFileOutput(self, sfile, shead, '\t', datadict)
            self.log.printLog(
                '\r#DOM', 'HPRD Domain output (%s): %s domains.' %
                (dfile, rje.integerString(len(self.dict['Domains']))))

            ### ~ Domain PPI Dataset Outputs ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
            datpath = self.info['OutDir'] + rje.makePath(
                'HPRD_Domain_Datasets/')
            rje.mkDir(self, datpath)
            for domain in rje.sortKeys(self.dict['Domains']):
                ## Generate a list of all interactors with domain-containing proteins ##
                plist = []
                for p1 in self.dict['Domains'][domain]:
                    if p1 not in self.dict['PPI']: continue
                    for p2 in self.dict['PPI'][p1]:
                        if p2 not in plist: plist.append(p2)
                plist.sort()
                ## Generate Sequence list and output ##
                mylist = []
                for p in plist:
                    if self.opt['AllIso']:
                        mylist += self.dict['HPRD'][p]['Seq']
                    else:
                        mylist.append(self.dict['HPRD'][p]['Seq'])
                sfile = '%s%s_hprd.fas' % (datpath, domain)
                if mylist:
                    self.obj['SeqList'].saveFasta(seqs=mylist, seqfile=sfile)
                else:
                    self.log.printLog(
                        '#DOM', 'No PPI partners for domain "%s"' % domain)
            self.log.printLog('\r#DOM', 'HPRD Domain fasta output complete.')
        except:
            self.log.errorLog('Error in HPRD.saveFasta()',
                              printerror=True,
                              quitchoice=False)
            raise
Ejemplo n.º 29
0
 def mapRegionsToSequences(self):    ### Maps tabulates PPI regions onto sequence datasets
     '''Maps tabulates PPI regions onto sequence datasets.'''
     try:### ~ [1] Setup ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         minseq = 3
         outdir = 'RegPPI/'
         adddir = 'RegPPIAdd/'
         rje.mkDir(self,outdir)
         rje.mkDir(self,adddir)
         tabfile = 'ppi_region.tdt'
         region = rje.dataDict(self,tabfile,['Interactor','Protein'],['Start','End'],lists=True)
         ### ~ [2] Work through each pair in turn ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         px = 0.0; ptot = len(region); fx = 0
         for pair in rje.sortKeys(region):
             self.progLog('\r#FAS','Generating fasta files: %.2f%%' % (px/ptot)); px += 100.0
             ## ~ [2a] Map sequences to PPI dictionary ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
             [hub, spoke] = string.split(pair,'\t')
             try: qryseq = self.dict['SeqObj'][spoke]
             except: self.printLog('\n#QRY','Spoke gene "%s" missing from Sequence file' % spoke); continue
             try: spoke = self.dict['Seq2Gene'][spoke]
             except: self.printLog('\n#QRY','Spoke protein "%s" missing from PPI dictionary' % spoke); continue
             if hub not in self.dict['PPI']: self.printLog('\n#HUB','Hub gene "%s" missing from PPI dictionary' % hub); continue
             addspoke = spoke not in self.dict['PPI'][hub]
             if addspoke:
                 self.dict['PPI'][hub].append(spoke)
                 self.printLog('\n#PPI','Added spoke gene "%s" to hub "%s" interactome' % (spoke,hub))
             if len(self.dict['PPI'][hub]) < minseq: self.printLog('\n#HUB','Hub "%s" interactome too small (<%s spokes)' % (hub,minseq)); continue
             ## ~ [2b] Identify query sequence ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
             reglist = []
             for pos in region[pair]['Start'] + region[pair]['End']: reglist.append(string.atoi(pos))
             reglist.sort()
             qsequence = qryseq.info['Sequence'].lower()
             self.deBug(len(qsequence))
             self.deBug(qsequence)
             prelen = len(qsequence)
             while reglist:
                 self.deBug(reglist)
                 try: startx = reglist.pop(0) - 1; endx = reglist.pop(0)
                 except: self.errorLog('%s PPI Region problem: %s' % (pair,region[pair])); continue
                 self.deBug(qsequence[startx-1:endx+1].upper())
                 qsequence = qsequence[:startx] + qsequence[startx:endx].upper() + qsequence[endx:]
             self.deBug(qsequence)
             if len(qsequence) != prelen:
                 self.printLog('#F**K','%s' % region[pair])
                 self.printLog('#F**K',qryseq.info['Sequence'].lower())
                 self.printLog('#F**K',qsequence)
                 raise ValueError
             ## ~ [2c] Output sequences ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
             if addspoke: outfile = '%s%s.%s.fas' % (adddir,hub,spoke); ox = 1
             else: outfile = '%s%s.%s.fas' % (outdir,hub,spoke); ox = 1
             open(outfile,'w').write('>%s\n%s\n' % (qryseq.info['Name'],qsequence))
             for spoke2 in self.dict['PPI'][hub]:
                 if spoke2 == spoke: continue
                 try:
                     sseq = self.dict['SeqObj'][self.dict['Gene2Seq'][spoke2]]
                     open(outfile,'a').write('>%s\n%s\n' % (sseq.info['Name'],sseq.info['Sequence']))
                     ox += 1
                 except: pass
             self.printLog('\n#FAS','%s sequences output to %s' % (ox,outfile))
                 
             
     except:
         self.errorLog(rje_zen.Zen().wisdom())
         raise   # Delete this if method error not terrible