コード例 #1
0
    def combineSNPs(self):  ### Calculates statistics of genetic differences from parsed PileUp Tables
        '''Calculates statistics of genetic differences from parsed PileUp Tables.'''
        try:### ~ [0] Setup ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
            if not self.list['SNPTables']: self.printLog('\r#SNP','No SNP tables to add.'); return False
            fdb = self.db().addTable(name='fdr',expect=True,mainkeys=['Locus','Pos'])
            fdb.remakeKeys()   #!# Delete once tuple thing OK
            fdbkeys = fdb.dataKeys()
            self.debug(fdbkeys[:100])
            snps = []
            snppos = []
            for snptable in self.list['SNPTables']:
                snps.append(self.db().addTable(snptable,name=rje.baseFile(snptable,True),expect=True,mainkeys=['Locus','Pos']))
                snps[-1].addField('SNP',evalue="YES")
                self.debug(snps[-1].dataKeys()[:100])
                snps[-1].remakeKeys()   #!# Delete once tuple thing OK
                self.debug(snps[-1].dataKeys()[:100])
                px = 0; ptot = snps[-1].entryNum(); sx = 0
                for pos in snps[-1].dataKeys(): # This should be a (Locus,Pos) tuple
                    self.progLog('\r#SNP','Scanning %s for extra SNP positions: %.2f%%' % (snps[-1].name(),px/ptot)); px += 100.0
                    if pos not in snppos + fdbkeys: snppos.append(pos); sx += 1
                self.printLog('\r#SNP','Scanned %s for extra SNP positions: %s to add.' % (snps[-1].name(),rje.iStr(sx)))
            ## ~ [0a] Add missing data from other tables ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
            if snppos:
                SAMSIG = open('%s.pdiff.tdt' % self.baseFile(),'r'); px = 0; ptot = len(snppos); ix = 0
                fline = SAMSIG.readline(); headers = rje.readDelimit(fline)
                fline = SAMSIG.readline()
                self.progLog('\r#SNP','%s/%s SNP positions added from %s PDiff filelines.' % (rje.iStr(px),rje.iStr(ptot),rje.iStr(ix)))
                while fline:
                    data = rje.readDelimit(fline); ix += 1
                    if (data[0],data[1]) in snppos:
                        entry = {'p.FDR':'-'}
                        for i in range(len(data)): entry[headers[i]] = data[i]
                        fdb.addEntry(entry); px += 1
                        snppos.remove((data[0],data[1]))
                        self.progLog('\r#SNP','%s/%s SNP positions added from %s PDiff filelines.' % (rje.iStr(px),rje.iStr(ptot),rje.iStr(ix)))
                    else: self.progLog('\r#SNP','%s/%s SNP positions added from %s PDiff filelines.' % (rje.iStr(px),rje.iStr(ptot),rje.iStr(ix)))
                    if not snppos: break
                    fline = SAMSIG.readline()
                SAMSIG.close()
                self.printLog('\r#SNP','%s/%s SNP positions added from PDiff file.' % (rje.iStr(px),rje.iStr(ptot)))
            else: self.printLog('\r#SNP','No SNP positions to add.'); return False

            ### ~ [1] Join Tables ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
            temp = fdb
            temp.makeField('#Locus#|#Pos#')
            for snptable in snps:
                snptable.makeField('#Locus#|#Pos#')
                newtemp = self.db().joinTables(name='newtemp',join=[(temp,'#Locus#|#Pos#'),(snptable,'#Locus#|#Pos#',['SNP'])],newkey=['Locus','Pos'],keeptable=True)
                self.printLog('#SNP','Added SNPs from %s' % snptable.name())
                self.db().deleteTable(temp)
                temp = newtemp
                temp.renameField('SNP',snptable.name())
                temp.setStr({'Name':'temp'})
            temp.dropField('#Locus#|#Pos#')
            self.db().list['Tables'].append(temp)
            temp.setStr({'Name':'SNPs'})
            temp.saveToFile()
            return temp
        except: self.errorLog('%s.pileUpStats() error' % (self)); return None
コード例 #2
0
def tableToHTML(delimtext,
                delimit,
                tabwidth='100%',
                tdwidths=[],
                tdalign=[],
                valign='center',
                thead=True,
                border=1,
                tabid=''):  # Makes HTML Table
    '''
    Converts delimited plain text into an HTML table.
    >> delimtext:str = Delimited text to convert
    >> delimit:str = Text delimiter for conversion.
    >> tabwidth:str ['100%'] = width of table
    >> tdwidths:list [] = Optional list of widths of columns
    >> tdalign:list [] = Optional list of text alignment for columns
    >> valign:str ['center'] = Vertical text alignment for columns
    >> thead:bool [True] = Whether first row should use th rather than td
    >> border:int [1] = Table border strength
    >> tabid:str [''] = Table ID setting (for CSS formatting)
    '''
    ### [0] Setup Table ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
    if tabid: html = '<table width="%s" id="%s">\n' % (tabwidth, tabid)
    else: html = '<table width="%s" border=%d>\n' % (tabwidth, border)
    tablines = string.split(delimtext, '\n')
    ### [1] Header Row ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
    if thead:
        html += '<tr>\n'
        headtext = rje.readDelimit(tablines.pop(0), delimit)
        tw = tdwidths[0:]
        ta = tdalign[0:]
        while headtext:
            tag = 'th'
            if tw: tag += ' width="%s"' % tw.pop(0)
            if ta: tag += ' align=%s' % ta.pop(0)
            html += '<%s>%s</th>\n' % (tag, headtext.pop(0))
        html += '</tr>\n'
    ### [2] Main body ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
    while tablines:
        tabtext = rje.readDelimit(tablines.pop(0), delimit)
        if not tabtext: continue
        html += '<tr>\n'
        tw = tdwidths[0:]
        ta = tdalign[0:]
        while tabtext:
            tag = 'td valign=%s' % valign
            if tw: tag += ' width="%s"' % tw.pop(0)
            if ta: tag += ' align=%s' % ta.pop(0)
            html += '<%s>%s</td>\n' % (tag, tabtext.pop(0))
        html += '</tr>\n'
    ### [3] End table ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
    html += '</table>\n\n'
    return html
コード例 #3
0
ファイル: unifake.py プロジェクト: kwikwag/SLiMSuite
 def loadFeatures(self,ftfile):  ### Loads features from given file
     '''Loads features from given file.'''
     try:### ~ [1] ~ Setup ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         if ftfile in ['','none']: return
         if not os.path.exists(ftfile): return self.printLog('#ERR','Features file "%s" missing')
         delimit = rje.delimitFromExt(filename=ftfile)
         ## ~ [1a] ~ Establish headers ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
         headers = rje.readDelimit(open(ftfile,'r').readline(),delimit)
         mainkeys = [headers[0]]
         hmap = {}
         for h in headers: hmap[h.lower()] = h
         pos = ''    # Leader for start/end positions
         if 'ft_start' in hmap or 'ft_end' in hmap: pos = 'ft_'
         for h in ['feature','%sstart' % pos,'%send' % pos,'description']:
             if h not in hmap: return self.printLog('#ERR','No %s field detected in "%s" features file' % (h,ftfile))
             mainkeys.append(hmap[h])
         mainkeys.remove(hmap['description'])
         ### ~ [2] ~ Load Data ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         ftdata = rje.dataDict(self,ftfile,mainkeys,['description'],delimit,headers,lists=True)
         (mx,mtot,fx) = (0.0,len(ftdata),0)
         for mainkey in rje.sortKeys(ftdata):
             self.progLog('\r#FT','Loading features from %s: %.2f%%' % (ftfile,mx/mtot))
             mx += 100.0                                                                           
             (id,ft,start,end) = string.split(mainkey,delimit)
             if id == mainkeys[0]: continue
             if id not in self.dict['Features']: self.dict['Features'][id] = []
             for desc in ftdata[mainkey][hmap['description']]:
                 fx += 1
                 self.dict['Features'][id].append({'Type':ft,'Start':int(start),'End':int(end),'Desc':desc})
         self.printLog('\r#FT','Loaded %s features for %s IDs from %s' % (rje.integerString(fx),rje.integerString(len(self.dict['Features'])),ftfile))
     except: self.errorLog('UniFake.loadFeatures error ["%s"]' % ftfile)
コード例 #4
0
 def setup(self):    ### Main class setup method.
     '''Main class setup method.'''
     try:### ~ [1] Setup ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         self.obj['DB'] = rje_db.Database(self.log,self.cmd_list+['tuplekeys=T'])
         if self.baseFile().lower() in ['','none']: self.baseFile('%s.vs.%s.Q%d' % (rje.baseFile(self.getStr('MutPileup'),True),rje.baseFile(self.getStr('WTPileup'),True),self.getInt('QCut')))
         if not self.force() and os.path.exists('%s.fdr.tdt' % self.baseFile()): return
         ### ~ [2] Look for/process WT Data ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         if self.force() or not os.path.exists('%s.WT.tdt' % self.baseFile()): self.parsePileup('WT',self.getStr('WTPileup'))
         ### ~ [3] Generate Reference sequences and Major Alleles (by locus) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         refseq = {}; rx = 0
         majors = {}
         locus = None
         WTDATA = open('%s.WT.tdt' % self.baseFile(),'r'); wx = 0
         for line in WTDATA:
             self.progLog('\r#WT','Reading WT data: Reference seq length = %s nt' % (rje.iStr(rx)),rand=0.01)
             data = rje.readDelimit(line); wx += 1
             if data[0] == 'Locus': continue
             else:
                 if data[0] != locus: locus = data[0]; refseq[locus] = ''; majors[locus] = []
                 pos = int(data[1])
                 while (pos - 1) > len(refseq[locus]): refseq[locus] += '?'; rx += 1
                 while (pos - 1) > len(majors[locus]): majors[locus].append('-')
                 refseq[locus] += data[2]; majors[locus].append(data[5]); rx += len(data[2])
         WTDATA.close()
         self.printLog('\r#WT','%s lines read from WT data: Reference seq length = %s nt' % (rje.iStr(wx),rje.iStr(rx)))
         for locus in rje.sortKeys(majors):
             if len(majors[locus]) != len(refseq[locus]): self.errorLog('%s WTMajor versus RefSeq length mismatch!' % locus,printerror=False); raise ValueError
         self.dict['WTMajor'] = majors
         self.dict['RefSeq'] = refseq
         ### ~ [3] Look for/process Mutant Data ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         if self.force() or not os.path.exists('%s.Mut.tdt' % self.baseFile()): self.parsePileup('Mut',self.getStr('MutPileup'),True)
         return True     # Setup successful
     except: self.errorLog('Problem during %s setup.' % self); return False  # Setup failed
コード例 #5
0
ファイル: rje_html.py プロジェクト: kwikwag/SLiMSuite
def tableToHTML(delimtext,delimit,tabwidth='100%',tdwidths=[],tdalign=[],valign='center',thead=True,border=1,tabid=''):    # Makes HTML Table
    '''
    Converts delimited plain text into an HTML table.
    >> delimtext:str = Delimited text to convert
    >> delimit:str = Text delimiter for conversion.
    >> tabwidth:str ['100%'] = width of table
    >> tdwidths:list [] = Optional list of widths of columns
    >> tdalign:list [] = Optional list of text alignment for columns
    >> valign:str ['center'] = Vertical text alignment for columns
    >> thead:bool [True] = Whether first row should use th rather than td
    >> border:int [1] = Table border strength
    >> tabid:str [''] = Table ID setting (for CSS formatting)
    '''
    ### [0] Setup Table ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
    if tabid: html = '<table width="%s" id="%s">\n' % (tabwidth,tabid)
    else: html = '<table width="%s" border=%d>\n' % (tabwidth,border)
    tablines = string.split(delimtext,'\n')
    ### [1] Header Row ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
    if thead:
        html += '<tr>\n'
        headtext = rje.readDelimit(tablines.pop(0),delimit)
        tw = tdwidths[0:]
        ta = tdalign[0:]
        while headtext:
            tag = 'th'
            if tw: tag += ' width="%s"' % tw.pop(0)
            if ta: tag += ' align=%s' % ta.pop(0)
            html += '<%s>%s</th>\n' % (tag,headtext.pop(0))
        html += '</tr>\n'
    ### [2] Main body ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
    while tablines:
        tabtext = rje.readDelimit(tablines.pop(0),delimit)
        if not tabtext: continue
        html += '<tr>\n'
        tw = tdwidths[0:]
        ta = tdalign[0:]
        while tabtext:
            tag = 'td valign=%s' % valign
            if tw: tag += ' width="%s"' % tw.pop(0)
            if ta: tag += ' align=%s' % ta.pop(0)
            html += '<%s>%s</td>\n' % (tag,tabtext.pop(0))
        html += '</tr>\n'
    ### [3] End table ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
    html += '</table>\n\n'
    return html
コード例 #6
0
 def loadFeatures(self, ftfile):  ### Loads features from given file
     '''Loads features from given file.'''
     try:  ### ~ [1] ~ Setup ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         if ftfile in ['', 'none']: return
         if not os.path.exists(ftfile):
             return self.printLog('#ERR', 'Features file "%s" missing')
         delimit = rje.delimitFromExt(filename=ftfile)
         ## ~ [1a] ~ Establish headers ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
         headers = rje.readDelimit(open(ftfile, 'r').readline(), delimit)
         mainkeys = [headers[0]]
         hmap = {}
         for h in headers:
             hmap[h.lower()] = h
         pos = ''  # Leader for start/end positions
         if 'ft_start' in hmap or 'ft_end' in hmap: pos = 'ft_'
         for h in [
                 'feature',
                 '%sstart' % pos,
                 '%send' % pos, 'description'
         ]:
             if h not in hmap:
                 return self.printLog(
                     '#ERR', 'No %s field detected in "%s" features file' %
                     (h, ftfile))
             mainkeys.append(hmap[h])
         mainkeys.remove(hmap['description'])
         ### ~ [2] ~ Load Data ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         ftdata = rje.dataDict(self,
                               ftfile,
                               mainkeys, ['description'],
                               delimit,
                               headers,
                               lists=True)
         (mx, mtot, fx) = (0.0, len(ftdata), 0)
         for mainkey in rje.sortKeys(ftdata):
             self.progLog(
                 '\r#FT',
                 'Loading features from %s: %.2f%%' % (ftfile, mx / mtot))
             mx += 100.0
             (id, ft, start, end) = string.split(mainkey, delimit)
             if id == mainkeys[0]: continue
             if id not in self.dict['Features']:
                 self.dict['Features'][id] = []
             for desc in ftdata[mainkey][hmap['description']]:
                 fx += 1
                 self.dict['Features'][id].append({
                     'Type': ft,
                     'Start': int(start),
                     'End': int(end),
                     'Desc': desc
                 })
         self.printLog(
             '\r#FT', 'Loaded %s features for %s IDs from %s' %
             (rje.integerString(fx),
              rje.integerString(len(self.dict['Features'])), ftfile))
     except:
         self.errorLog('UniFake.loadFeatures error ["%s"]' % ftfile)
コード例 #7
0
ファイル: slimparser.py プロジェクト: lyhniupi1/SLiMSuite
 def restOutput(self,outfmt=None,maxparsesize=0,asjson=False):    ### Returns rest output for outfmt
     '''Returns rest output for outfmt.'''
     if not outfmt: outfmt = self.getStrLC('Rest')
     if not outfmt: self.jsonText('No REST output',asjson)
     if outfmt in self.dict['Output']:
         rfile = string.split(self.dict['Output'][outfmt],'\n')[0]
         if rje.exists(rfile):
             fext = string.split(rfile,'.')[-1]
             if fext in ['png']:
                 self.debug(rfile)
                 self.jsonText(rfile,asjson)
             nbytes = os.path.getsize(rfile)
             if nbytes > maxparsesize > 0:   # Too large to parse
                 otext = '%s is too large to return (%s > %s)' % (os.path.basename(rfile),rje.humanByteSize(nbytes),rje.humanByteSize(maxparsesize))
                 try: jobid = self.dict['Output']['jobid']
                 except: jobid = None
                 resturl = '%sretrieve&jobid=%s&rest=%s[&password=X]' % (self.getStr('RestURL'),jobid,outfmt)
                 if not jobid or outfmt == self.getStrLC('Rest'): return self.jsonText('ERROR: %s' % (otext),asjson)
                 else: return self.jsonText('%s in full output. Try %s.' % (otext,resturl),asjson)
             else:
                 delimit = rje.delimitFromExt(filename=rfile,write=False)
                 if asjson and delimit in [',','\t']:
                     jtext = []
                     for rline in open(rfile,'r').readlines():
                         jtext.append(json.dumps(rje.readDelimit(rline,delimit)))
                     return '[%s]' % string.join(jtext,',\n        ')
                 #!# Add json parsing of fasta files?
                 else:
                     outtxt = open(rfile,'r').read()
                     if not outtxt.endswith('\n'): outtxt += '\n'
                     return self.jsonText(outtxt,asjson)
         elif asjson and outfmt in self.dict['Outfile']:
             pass    #!# Sort out json formatting here based on file extension!
         return self.dict['Output'][outfmt]
     elif outfmt in ['parse','format']:
         intro = '<pre>%s</pre>\n\n' % self.restOutput('intro')
         return self.jsonText(intro,asjson)
     elif outfmt in ['default','full']: return self.jsonText(self.restFullOutput(maxparsesize),asjson)
     elif outfmt in ['restkeys','outputs']: return string.join(self.list['RestKeys']+[''],'\n')
     return self.jsonText('No %s output generated.' % outfmt,asjson)
コード例 #8
0
 def pileUpFDR(self):  ### Calculates statistics of genetic differences from parsed PileUp Tables
     '''Calculates statistics of genetic differences from parsed PileUp Tables.'''
     try:### ~ [0] Setup ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         fdrfile = '%s.fdr.tdt' % self.baseFile()
         if not self.force() and os.path.exists(fdrfile): return 
         sigpval = {}    # pval:[fpos]
         npos = 0; nx = 0
         for locus in rje.sortKeys(self.dict['RefSeq']):
             npos += len(self.dict['RefSeq'][locus]) - self.dict['RefSeq'][locus].count('?')
         ### ~ [1] Parse out stats ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         SAMSIG = open('%s.pdiff.tdt' % self.baseFile(),'r')
         headers = string.split(SAMSIG.readline()) + ['p.FDR']
         fpos = SAMSIG.tell(); fline = SAMSIG.readline(); px = 0
         while fline:
             self.progLog('\r#SIG','Reading Pvalues: %s p <= 0.05...' % rje.iStr(px))
             try: pval = float(string.split(fline)[-1])
             except: break
             if pval <= 0.05:
                 if pval not in sigpval: sigpval[pval] = []
                 sigpval[pval].append(fpos); px += 1
             fpos = SAMSIG.tell(); fline = SAMSIG.readline()
         self.printLog('\r#SIG','Reading Pvalues complete: %s p <= 0.05.' % rje.iStr(px))
         ### ~ [2] Calculate FDR and output ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         SAMFDR = open(fdrfile,'w')
         rje.writeDelimit(SAMFDR, headers)
         px = 0; sx = 0.0; stot = len(sigpval)
         for pval in rje.sortKeys(sigpval):
             self.progLog('\r#FDR','Calculating FDR: %.2f%%' % (sx/stot)); sx += 100.0
             px += len(sigpval[pval])
             if pval: fdr = (pval * npos) / px
             else: fdr = 0.0
             for fpos in sigpval[pval]:
                 SAMSIG.seek(fpos)
                 rje.writeDelimit(SAMFDR,rje.readDelimit(SAMSIG.readline())+[rje.expectString(fdr)])
         SAMSIG.close()
         SAMFDR.close()
         self.printLog('\r#FDR','%s FDR lines output to %s' % (rje.iStr(px),fdrfile))
     except: self.errorLog('%s.pileUpFDR() error' % (self)); return None
コード例 #9
0
 def splitMascot(self):  ### Reads the MASCOT file and splits into header, hits and unmatched files.
     '''Reads the MASCOT file and splits into header, hits and unmatched files.'''
     try:### ~ [0] Setup ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         db = self.db()
         infile = self.getStr('MASCOT')
         if self.basefile().lower() in ['','none']: self.basefile(rje.baseFile(self.getStr('MASCOT')))
         #x#self.deBug(self.basefile())
         headfile = '%s.header.txt' % self.basefile()
         hitsfile = '%s.mascot.csv' % self.basefile()
         peptfile = '%s.nohits.csv' % self.basefile()
         if rje.isYounger(self.getStr('MASCOT'),hitsfile) == hitsfile and not self.force():
             return self.printLog('#FILE','%s file found (force=F)' % hitsfile)
         ### ~ [1] Split MASCOT~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         headlines = []
         csvhead = []
         mdb = None
         mx = 0
         itraq = []
         prot_data = {}
         for mline in open(self.getStr('MASCOT'),'r').readlines():
             mx += 1     # Index of next line in case needed for iTRAQ reading!
             ## ~ [1a] Skip down until Header found ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
             if not headlines and mline.find('Header') < 0: continue
             ## ~ [1b] Add Header lines to headlines until results headers found ~~~~~~~~~~~~~~~ ##
             if not csvhead and mline.find('prot_hit_num') < 0: headlines.append(mline); continue
             ## ~ [1c] Sort out MASCOT results headers ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
             if mline.find('prot_hit_num') >= 0:
                 ## ~ Read Headers ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
                 open(headfile,'w').writelines(headlines)
                 csvhead = rje.readDelimit(string.join(string.split(rje.chomp(mline))),',')
                 while '' in csvhead: csvhead.remove('')
                 ## ~ Sort out iTRAQ headers (missing) ~~~~~~~~~ ##
                 if self.getBool('iTRAQ'):
                     iline = open(self.getStr('MASCOT'),'r').readlines()[mx]
                     for isplit in rje.readDelimit(iline,',')[len(csvhead):]:  # Should be start of iTRAQ data
                         if '/' in isplit: itraq.append(isplit)
                     self.printLog('#ITRAQ',string.join(itraq))
                     csvhead += itraq
                     idb = db.addEmptyTable('itraq',['prot_hit_num','prot_acc','prot_desc','itraq','ratio','n','geomean','summary'],keys=['prot_hit_num','itraq'])
                     idb.info['Delimit'] = ','
                 ## ~ Add emPAI header (also missing) ~~~~~~~~~~ ##
                 if self.getBool('emPAI'): csvhead.append('empai')
                 ## ~ Set up Database Table ~~~~~~~~~~~~~~~~~~~~ ##
                 self.printLog('#HEAD',string.join(csvhead,'; '))
                 mdb = db.addEmptyTable('mascot',csvhead,keys=['prot_hit_num','pep_query'])
                 mdb.info['Delimit'] = ','
             elif mline.find('Peptide matches') >= 0:
                 mdb.saveToFile()
                 if self.getBool('emPAI'): csvhead.remove('empai')
                 mdb = db.addEmptyTable('nohits',csvhead,keys=['pep_query'])
                 for field in mdb.fields():
                     if field[:4] == 'prot': mdb.dropField(field)
                 mdb.info['Delimit'] = ','
                 continue
             elif rje.chomp(mline):
                 #self.deBug('%s ... %s' % (mline[:20],mline.find('Peptide matches')))
                 data = rje.readDelimit(mline,',')
                 entry = {}; pretraq = True
                 #self.deBug(csvhead); self.deBug(itraq);
                 for d in range(len(csvhead)+len(itraq)):
                     if d >= len(data): break
                     if data[d] in itraq: dhead = data[d]; pretraq = False
                     elif data[d] == 'emPAI': entry['empai'] = data[d+1]; pretraq = False
                     elif pretraq and d < len(csvhead): dhead = csvhead[d]
                     elif pretraq: continue      # Unmatched peptides will not have emPAI or iTRAQ data
                     #self.deBug('%s > %s' % (data[d],dhead))
                     if d and data[d-1] == 'emPAI': continue
                     elif data[d] in itraq + ['emPAI']: continue
                     elif dhead not in entry: entry[dhead] = data[d]
                     #self.deBug('%s = %s' % (dhead,entry[dhead]))
                 if entry['prot_acc']: prot_data[entry['prot_hit_num']] = {'prot_acc':entry['prot_acc'],'prot_desc':entry['prot_desc']}
                 if self.getBool('iTRAQ') and 'Quantitation summary for protein' in data:
                     d = data.index('Quantitation summary for protein') + 1
                     if entry['prot_hit_num'] in prot_data:
                         pacc = prot_data[entry['prot_hit_num']]['prot_acc']
                         pdesc = prot_data[entry['prot_hit_num']]['prot_desc']
                     else:
                         pacc = entry['prot_acc']
                         pdesc = entry['prot_desc']
                     while d < len(data):
                         if data[d] in itraq:
                             idb.addEntry({'prot_hit_num':entry['prot_hit_num'],'prot_acc':pacc,'prot_desc':pdesc,
                                           'itraq':data[d],'ratio':data[d+1],'n':data[d+2],'geomean':data[d+3],'summary':data[d+4]})
                         d += 1
                 #self.deBug(entry)
                 if entry['prot_hit_num'] or entry['pep_query']: mdb.addEntry(entry)
         mdb.saveToFile()
         if self.getBool('iTRAQ'): idb.saveToFile()
         self.deBug('')
         return True
     except: self.errorLog('Error reading MASCOT file'); return False
コード例 #10
0
ファイル: rje_mc58.py プロジェクト: slimsuite/SLiMSuite
 def run(self):  ### Main run method
     '''Main run method.'''
     try:### ~ [1] Reformat Sequences ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         for fasta in glob.glob('*.fasta'):
             fas = fasta[:-2]
             if os.path.exists(fas): continue
             sx = 0
             for line in open(fasta,'r').readlines():
                 if line[:1] == '>':
                     try: (name,desc) = rje.matchExp('^>(\S+) (\S.+)$',line)
                     except: name = rje.matchExp('^>(\S+)',line)[0]
                     if len(string.split(name,'|')) == 3:
                         name = '6rf_NEIME__%s' % string.split(name,'|')[2]
                         open(fas,'a').write('>%s\n' % name)
                     elif len(string.split(name,'|')) == 5:
                         name = 'ref_NEIME__%s' % string.split(name,'|')[3]
                         open(fas,'a').write('>%s %s\n' % (name,desc))
                     else: print string.split(name,'|'); raise ValueError
                     self.progLog('\r#FAS','Processing %s: %s seqs' % (fas, rje.integerString(sx))); sx += 1
                 else: open(fas,'a').write(line)
             self.printLog('\r#FAS','Processed %s: %s seqs from %s' % (fas, rje.integerString(sx), fasta))
             rje_blast.BLASTRun(self.log,self.cmd_list).formatDB(fas,protein=True,force=True)
         ### ~ [2] Read in CSV Data ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         rfhits = {}     # Dictionary of {hit:['File:hit_num']}
         acc = 'MC58_6RF_Hits.acc'; open(acc,'w')
         gfile = 'MC58_6RF_Hits.vs.MC58_1.hitsum.tdt'
         cx = 0
         for csv in glob.glob('MC58_6RF_CSV/*.CSV'):
             cx += 1
             file = os.path.basename(csv)[:-4]
             hits = False
             for line in open(csv,'r').readlines():
                 if line.find('prot_hit_num,prot_acc') == 0: hits = True
                 elif hits:
                     data = rje.readDelimit(line,',')
                     if len(data) < 2: continue
                     [num,name] = data[:2]
                     try: name = string.split(name,'|')[2]
                     except: continue
                     if name not in rfhits:
                         open(acc,'a').write('6rf_NEIME__%s\n' % name)
                         rfhits[name] = []
                     id = '%s:%s' % (file,num)
                     if id not in rfhits[name]: rfhits[name].append(id)
                     self.progLog('\r#CSV','Reading %d CSV files: %s 6RF Hits' % (cx,rje.integerString(len(rfhits))))
         self.printLog('\r#CSV','Read %d CSV files: %s 6RF Hits output to %s' % (cx,rje.integerString(len(rfhits)),acc))
         ### ~ [3] Extract sequences and perform GABLAM ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         if not os.path.exists(gfile):
             seqlist = rje_seq.SeqList(self.log,self.cmd_list+['seqin=%s' % acc,'fasdb=MC58_6RF.fas','seqout=MC58_6RF_Hits.fas','autoload=T','accnr=F','seqnr=F'])
             seqlist.info['Name'] = 'MC58_6RF_Hits.fas'
             seqlist.saveFasta()
             gablam.GABLAM(self.log,self.cmd_list+['seqin=MC58_6RF_Hits.fas','searchdb=MC58_1.fas','qryacc=F']).gablam()
         ### ~ [4] Read in GABLAM and ID Hits without genomic homology ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         gdata = rje.dataDict(self,gfile,['Qry'],['HitNum'])
         zeros = []
         for hit in gdata:
             if string.atoi(gdata[hit]['HitNum']) == 0: zeros.append(hit)
         zeros = rje.sortUnique(zeros,False)
         open('6rf_zeros.acc','w').write(string.join(zeros,'\n'))
         self.printLog('#ZERO','%d 6RF hits with 0 BLAST hits to MC58_1' % len(zeros))
         ufile = 'MC58_6RF_Zeros.vs.embl_bacteria.hitsum.tdt'
         if not os.path.exists(ufile):
             seqlist = rje_seq.SeqList(self.log,self.cmd_list+['seqin=6rf_zeros.acc','fasdb=MC58_6RF.fas','seqout=MC58_6RF_Zeros.fas','autoload=T','accnr=F','seqnr=F'])
             seqlist.info['Name'] = 'MC58_6RF_Zeros.fas'
             seqlist.saveFasta()
             gablam.GABLAM(self.log,self.cmd_list+['seqin=MC58_6RF_Zeros.fas','searchdb=/scratch/Databases/NewDB/TaxaDB/embl_bacteria.fas','qryacc=F']).gablam()
         gdata = rje.dataDict(self,ufile,['Qry'],getheaders=True)
         fdata = rje.dataDict(self,string.replace(ufile,'hitsum','gablam'),['Qry'],['Hit'],lists=True)
         headers = gdata.pop('Headers')
         headers.insert(1,'Sample')
         headers.append('BestHit')
         rje.delimitedFileOutput(self,'MC58_6RF_Zeros.tdt',headers,rje_backup=True)
         for rf in rje.sortKeys(gdata):
             rfcut = string.split(rf,'__')[1]
             gdata[rf]['Sample'] = string.join(rfhits[rfcut],'; ')
             gdata[rf]['Qry'] = rfcut
             try: gdata[rf]['BestHit'] = fdata[rf]['Hit'][0]
             except: gdata[rf]['BestHit']  = '-'
             rje.delimitedFileOutput(self,'MC58_6RF_Zeros.tdt',headers,datadict=gdata[rf])
         
     except: self.errorLog(rje_zen.Zen().wisdom())
     self.printLog('#ZEN',rje_zen.Zen().wisdom())
コード例 #11
0
ファイル: rje_mc58.py プロジェクト: lyhniupi1/SLiMSuite
    def run(self):  ### Main run method
        '''Main run method.'''
        try:  ### ~ [1] Reformat Sequences ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
            for fasta in glob.glob('*.fasta'):
                fas = fasta[:-2]
                if os.path.exists(fas): continue
                sx = 0
                for line in open(fasta, 'r').readlines():
                    if line[:1] == '>':
                        try:
                            (name,
                             desc) = rje.matchExp('^>(\S+) (\S.+)$', line)
                        except:
                            name = rje.matchExp('^>(\S+)', line)[0]
                        if len(string.split(name, '|')) == 3:
                            name = '6rf_NEIME__%s' % string.split(name, '|')[2]
                            open(fas, 'a').write('>%s\n' % name)
                        elif len(string.split(name, '|')) == 5:
                            name = 'ref_NEIME__%s' % string.split(name, '|')[3]
                            open(fas, 'a').write('>%s %s\n' % (name, desc))
                        else:
                            print string.split(name, '|')
                            raise ValueError
                        self.progLog(
                            '\r#FAS', 'Processing %s: %s seqs' %
                            (fas, rje.integerString(sx)))
                        sx += 1
                    else:
                        open(fas, 'a').write(line)
                self.printLog(
                    '\r#FAS', 'Processed %s: %s seqs from %s' %
                    (fas, rje.integerString(sx), fasta))
                rje_blast.BLASTRun(self.log,
                                   self.cmd_list).formatDB(fas,
                                                           protein=True,
                                                           force=True)
            ### ~ [2] Read in CSV Data ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
            rfhits = {}  # Dictionary of {hit:['File:hit_num']}
            acc = 'MC58_6RF_Hits.acc'
            open(acc, 'w')
            gfile = 'MC58_6RF_Hits.vs.MC58_1.hitsum.tdt'
            cx = 0
            for csv in glob.glob('MC58_6RF_CSV/*.CSV'):
                cx += 1
                file = os.path.basename(csv)[:-4]
                hits = False
                for line in open(csv, 'r').readlines():
                    if line.find('prot_hit_num,prot_acc') == 0: hits = True
                    elif hits:
                        data = rje.readDelimit(line, ',')
                        if len(data) < 2: continue
                        [num, name] = data[:2]
                        try:
                            name = string.split(name, '|')[2]
                        except:
                            continue
                        if name not in rfhits:
                            open(acc, 'a').write('6rf_NEIME__%s\n' % name)
                            rfhits[name] = []
                        id = '%s:%s' % (file, num)
                        if id not in rfhits[name]: rfhits[name].append(id)
                        self.progLog(
                            '\r#CSV', 'Reading %d CSV files: %s 6RF Hits' %
                            (cx, rje.integerString(len(rfhits))))
            self.printLog(
                '\r#CSV', 'Read %d CSV files: %s 6RF Hits output to %s' %
                (cx, rje.integerString(len(rfhits)), acc))
            ### ~ [3] Extract sequences and perform GABLAM ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
            if not os.path.exists(gfile):
                seqlist = rje_seq.SeqList(
                    self.log, self.cmd_list + [
                        'seqin=%s' % acc, 'fasdb=MC58_6RF.fas',
                        'seqout=MC58_6RF_Hits.fas', 'autoload=T', 'accnr=F',
                        'seqnr=F'
                    ])
                seqlist.info['Name'] = 'MC58_6RF_Hits.fas'
                seqlist.saveFasta()
                gablam.GABLAM(
                    self.log, self.cmd_list + [
                        'seqin=MC58_6RF_Hits.fas', 'searchdb=MC58_1.fas',
                        'qryacc=F'
                    ]).gablam()
            ### ~ [4] Read in GABLAM and ID Hits without genomic homology ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
            gdata = rje.dataDict(self, gfile, ['Qry'], ['HitNum'])
            zeros = []
            for hit in gdata:
                if string.atoi(gdata[hit]['HitNum']) == 0: zeros.append(hit)
            zeros = rje.sortUnique(zeros, False)
            open('6rf_zeros.acc', 'w').write(string.join(zeros, '\n'))
            self.printLog(
                '#ZERO',
                '%d 6RF hits with 0 BLAST hits to MC58_1' % len(zeros))
            ufile = 'MC58_6RF_Zeros.vs.embl_bacteria.hitsum.tdt'
            if not os.path.exists(ufile):
                seqlist = rje_seq.SeqList(
                    self.log, self.cmd_list + [
                        'seqin=6rf_zeros.acc', 'fasdb=MC58_6RF.fas',
                        'seqout=MC58_6RF_Zeros.fas', 'autoload=T', 'accnr=F',
                        'seqnr=F'
                    ])
                seqlist.info['Name'] = 'MC58_6RF_Zeros.fas'
                seqlist.saveFasta()
                gablam.GABLAM(
                    self.log, self.cmd_list + [
                        'seqin=MC58_6RF_Zeros.fas',
                        'searchdb=/scratch/Databases/NewDB/TaxaDB/embl_bacteria.fas',
                        'qryacc=F'
                    ]).gablam()
            gdata = rje.dataDict(self, ufile, ['Qry'], getheaders=True)
            fdata = rje.dataDict(self,
                                 string.replace(ufile, 'hitsum', 'gablam'),
                                 ['Qry'], ['Hit'],
                                 lists=True)
            headers = gdata.pop('Headers')
            headers.insert(1, 'Sample')
            headers.append('BestHit')
            rje.delimitedFileOutput(self,
                                    'MC58_6RF_Zeros.tdt',
                                    headers,
                                    rje_backup=True)
            for rf in rje.sortKeys(gdata):
                rfcut = string.split(rf, '__')[1]
                gdata[rf]['Sample'] = string.join(rfhits[rfcut], '; ')
                gdata[rf]['Qry'] = rfcut
                try:
                    gdata[rf]['BestHit'] = fdata[rf]['Hit'][0]
                except:
                    gdata[rf]['BestHit'] = '-'
                rje.delimitedFileOutput(self,
                                        'MC58_6RF_Zeros.tdt',
                                        headers,
                                        datadict=gdata[rf])

        except:
            self.errorLog(rje_zen.Zen().wisdom())
        self.printLog('#ZEN', rje_zen.Zen().wisdom())
コード例 #12
0
 def pileUpStats(self):  ### Calculates statistics of genetic differences from parsed PileUp Tables
     '''Calculates statistics of genetic differences from parsed PileUp Tables.'''
     try:### ~ [0] Setup ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         statfile = '%s.pdiff.tdt' % self.baseFile()
         if not self.force() and os.path.exists(statfile): return self.pileUpFDR()
         ## ~ [0a] Load WT Data ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
         wtdata = {}     # Load lists of data for compiling
         for locus in self.dict['RefSeq']:
             wtdata[locus] = {}
             for field in ['N','QN','MajFreq']: wtdata[locus][field] = []
         WTDATA = open('%s.WT.tdt' % self.baseFile(),'r'); wx = 1
         fields = []
         for line in WTDATA:
             data = rje.readDelimit(line)
             if fields:
                 locus = data[0]
                 pos = int(data[1])
                 while pos > wx:
                     wtdata[locus]['N'].append(0); wtdata[locus]['QN'].append(0); wtdata[locus]['MajFreq'].append(0.0); wx += 1
                 for field in ['N','QN']: wtdata[locus][field].append(int(data[fields.index(field)]))
                 for field in ['MajFreq']: wtdata[locus][field].append(string.atof(data[fields.index(field)]))
                 wx += 1
             else: fields = data[0:]
         WTDATA.close()
         ## ~ [0b] Load WT Data ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
         mutdata = {}     # Load lists of data for compiling
         for locus in self.dict['RefSeq']:
             mutdata[locus] = {}
             for field in ['N','QN','Major','MajFreq','WTFreq']: mutdata[locus][field] = []
         MUTDATA = open('%s.Mut.tdt' % self.baseFile(),'r'); mx = 1
         fields = []
         for line in MUTDATA:
             data = rje.readDelimit(line)
             if fields:
                 locus = data[0]
                 self.str['RefSeq'] = self.dict['RefSeq'][locus]
                 pos = int(data[1])
                 try:
                     if pos > len(self.str['RefSeq']):
                         while (pos-1) > len(self.str['RefSeq']): self.str['RefSeq'] += '?'
                         self.str['RefSeq'] += data[2]
                         self.dict['RefSeq'][locus] = self.str['RefSeq']
                     elif self.str['RefSeq'][pos-1] == '?':
                         self.str['RefSeq'] = self.str['RefSeq'][:pos-1] + data[2] + self.str['RefSeq'][pos:]
                         self.dict['RefSeq'][locus] = self.str['RefSeq']
                 except: self.warnLog('Problem mapping Pos %s onto %snt %s RefSeq' % (rje.iStr(pos),locus,rje.iLen(self.str['RefSeq'])))
                 while pos > mx:
                     mutdata[locus]['N'].append(0); mutdata[locus]['QN'].append(0); mutdata[locus]['Major'].append('-'); mutdata[locus]['MajFreq'].append(0.0); mutdata[locus]['WTFreq'].append(0.0); mx += 1
                 for field in ['N','QN']: mutdata[locus][field].append(int(data[fields.index(field)]))
                 for field in ['MajFreq','WTFreq']: mutdata[locus][field].append(string.atof(data[fields.index(field)]))
                 for field in ['Major']: mutdata[locus][field].append(data[fields.index(field)])
                 mx += 1
             else: fields = data[0:]
         MUTDATA.close()
         ## ~ [0c] Integrity check ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ##
         #!# Need a new check with locus info #!#
         #for field in wtdata:    #!# Won't be true - not all reference genome positions present in output (0 mapped reads)
         #    if len(wtdata[field]) != len(self.str['RefSeq']): self.errorLog('Data length mismatch for WT %s' % field,printerror=False); raise ValueError
         #for field in mutdata:    #!# Won't be true - not all reference genome positions present in output (0 mapped reads)
         #    if len(mutdata[field]) != len(self.str['RefSeq']): self.errorLog('Data length mismatch for Mutant %s' % field,printerror=False); raise ValueError
         #self.printLog('#REF','WT and Mutant data for %s reference positions' % rje.iLen(self.str['RefSeq']))
         ### ~ [1] Assess and output ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         SAMSIG = open('%s.pdiff.tdt' % self.baseFile(),'w')
         headers = ['Locus','Pos','Ref','WT.N','WT.QN','WT.Major','WT.MajFreq','Mut.N','Mut.QN','Mut.Major','Mut.MajFreq','Mut.WTFreq','p.Over','p.Under','p.Diff']
         SAMSIG.write('%s\n' % string.join(headers,'\t'))
         nodifx = 0; nomutx = 0; sx = 0
         for locus in rje.sortKeys(self.dict['RefSeq']):
             self.str['RefSeq'] = self.dict['RefSeq'][locus]
             self.list['WTMajor'] = self.dict['WTMajor'][locus]
             for i in range(len(self.str['RefSeq'])):
                 try:
                     sigdata = [locus,i+1,self.str['RefSeq'][i],wtdata[locus]['N'][i],wtdata[locus]['QN'][i],self.list['WTMajor'][i],wtdata[locus]['MajFreq'][i],
                                mutdata[locus]['N'][i],mutdata[locus]['QN'][i],mutdata[locus]['Major'][i],mutdata[locus]['MajFreq'][i],mutdata[locus]['WTFreq'][i]]
                 except: self.warnLog('Incomplete data for %s:%s (no pdiff output)' % (locus,rje.iStr(i+1))); continue
                 if self.getBool('MajDif') and self.list['WTMajor'][i] == mutdata[locus]['Major'][i]: nodifx += 1; continue   # Was: sigdata += [1.0,1.0]
                 elif self.getBool('MajMut') and self.str['RefSeq'][i] == mutdata[locus]['Major'][i]: nomutx += 1;continue
                 elif not wtdata[locus]['MajFreq'][i]:    # No Data for WT
                     if mutdata[locus]['WTFreq'][i]: sigdata += [0.0,1.0]
                     else: sigdata += [1.0,1.0]
                 elif mutdata[locus]['WTFreq'][i] > wtdata[locus]['MajFreq'][i]:
                     obs = int((mutdata[locus]['QN'][i] * mutdata[locus]['WTFreq'][i]) + 0.5)
                     sigdata.append(rje.binomial(obs,mutdata[locus]['QN'][i],wtdata[locus]['MajFreq'][i],usepoisson=False,callobj=self))
                     sigdata.append(1.0)
                 elif mutdata[locus]['WTFreq'][i] < wtdata[locus]['MajFreq'][i]:
                     obs = int((mutdata[locus]['QN'][i] * mutdata[locus]['WTFreq'][i]) + 0.5)
                     sigdata.append(1.0)
                     sigdata.append(1.0 - rje.binomial(obs+1,mutdata[locus]['QN'][i],wtdata[locus]['MajFreq'][i],usepoisson=False,callobj=self))
                 else: sigdata += [1.0,1.0]
                 sigdata.append(min(1.0,2*min(sigdata[-2:])))
                 rje.writeDelimit(SAMSIG,sigdata); sx += 1
         SAMSIG.close()
         ptxt = '%s lines output to *.pdiff.txt' % rje.iStr(sx)
         if self.getBool('MajDif'): ptxt += '; %s positions skipped where WTMajor==MutMajor (majdif=T)' % rje.iStr(nodifx)
         if self.getBool('MajMut'): ptxt += '; %s positions skipped where Ref==MutMajor (majmut=T)' % rje.iStr(nomutx)
         self.printLog('#PDIFF','%s.' % ptxt)
         ### ~ [2] FDR Correction ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
         self.pileUpFDR()
     except: self.errorLog('%s.pileUpStats() error' % (self)); return None