Esempio n. 1
0
def calcFDRStats(realCount,ctrlCounts):

        FDRs = []
        
        for i in range(len(ctrlCounts)):
            ctrl = ctrlCounts[i]
            real = realCount
            
            if real == 0:
                return (None,None)
            else:
                fdr = float(ctrl)/real
                
            if fdr > 1:
                FDRs.append(1.0)
            else:
                FDRs.append(fdr)
                
        return mathDefs.stdDv(FDRs,'median')
Esempio n. 2
0
 def calcCtrlMedianStDv(self):
     """
     Iterate through the self.ctrlCount data and calculate the MEDIAN and stdDev from the
     MEDIAN for each seedType permutation set.
     """
     # Make sure we have already run self.countHitsInOrthos()
     assert self.matchCounts and self.ctrlCounts, \
            'ERROR:  It looks like we have not called countHitsInOrthos().'
     
     # Initialize data structure
     for seedType in _seedModels:
         self.ctrlMedianStd[seedType]  = [[None,None],[None,None],[None,None],[None,None]] # SEE self.ctrlStats def in __init__ for format definition
     
     # Iterate through self.ctrlCounts
     for seedType in self.ctrlCounts:
         # Calc median and  medianStDv for each countCategory in self.ctrlCounts[seedType]
         for i in range(len(self.ctrlMedianStd[seedType])):
             medStDv, median = mathDefs.stdDv([x[i] for x in self.ctrlCounts[seedType]],kind='median',df=1)
             self.ctrlMedianStd[seedType][i][0] = median
             self.ctrlMedianStd[seedType][i][1] = medStDv
Esempio n. 3
0
def calcFDRStats(goDictEntry):
    realCount  = len(goDictEntry[0])
    ctrlCounts = [len(x) for x in goDictEntry[1]]
    
    FDRs = []
    for i in range(len(ctrlCounts)):
        ctrl = ctrlCounts[i]
        real = realCount
        
        if real == 0:
            return (None)
        else:
            fdr = float(ctrl)/real
            
        if fdr > 1:
            FDRs.append(1.0)
        else:
            FDRs.append(fdr)
            
    maths = mathDefs.stdDv(FDRs,'median')
    maths = list(maths)
    return [realCount]+maths
Esempio n. 4
0
def writeTargetsFdrMedMeth(miRobj_Ca,miRobj_Cb,oFile):
    #print 'Processing %s...' % (miRobj_Ca.name)
    miRHits_Ca = miRobj_Ca.reportGeneTargetsFdrMedMeth(stdvLimit=stdvsAboveMed,
                                                       consFdrThresh=consFdrThreshold,
                                                       divide=0)
    totReal = [None,set(),set(),set()]
    totCtrl = initList(len(miRobj_Ca.ctrlEvents[miRobj_Ca.ctrlEvents.keys()[0]]),[None, set(), set(),set()])
    
    # Calulate combined FDR for miRNA using Ctrl_b data from seedTypes that passed
    # the reportGeneTargetsFdrMedMeth() Ctrl_a screen.
    #   >> Gather and combine data from passed seedTypes:
    for oType in range(1,4):
        for sType in miRT._seedModels:
            if miRHits_Ca[sType][oType]:
                trLen_0 = len(totReal[oType])
                totReal[oType].update(miRHits_Ca[sType][oType][0])
                rUpdtLen = len(miRHits_Ca[sType][oType][0])
                trLen_1 = len(totReal[oType])
                None
                for i in range(len(totCtrl)):
                    tciLen_0 = len(totCtrl[i][oType])
                    totCtrl[i][oType].update(miRobj_Cb.ctrlEvents[sType][i][oType])
                    cUpdtLen = len(miRobj_Cb.ctrlEvents[sType][i][oType])
                    tciLen_1 = len(totCtrl[i][oType])
                    None
            
    #   >> Calculate separate FDRs for each Ctrl_b group:
    totalsData = [None,None,None,None]
    for oType in range(1,4):
        if totReal[oType] == set():
                continue
        tempFDRs = []
        for i in range(len(totCtrl)):
            ctrlVal = len(totCtrl[i][oType])
            realVal = len(totReal[oType])
            
            if float(ctrlVal)/realVal >= 1:
                tempFDRs.append(1.0)
            else:
                tempFDRs.append(float(ctrlVal)/realVal)
        tLen = len(tempFDRs)
        oFDRstdv,oFDRmed = mathDefs.stdDv(tempFDRs,'median')
        cons_oFDR        = oFDRmed + (stdvsAboveMed*oFDRstdv)
        totalsData[oType] = [totReal[oType],oFDRmed,cons_oFDR]
    
    # Write out Totals data:
    print miRobj_Cb.name
    outFile.write('-- %s --\n' % (miRobj_Cb.name)) 
    for i in range(1,len(totalsData)):
        if totalsData[i]:
            outFile.write('%s : allPassedSeedsFor_%s : %s : %s : %s  Seqs=%s\n' \
                          %(miRobj_Cb.name,
                            i,
                            len(totalsData[i][0]),
                            totalsData[i][2],
                            totalsData[i][1],
                            ','.join(sorted([str(x) for x in totalsData[i][0]]))))

                    
        
    # write out passed Seed data
    for seedType in sorted(miRHits_Ca):
        for i in range(1,len(miRHits_Ca[seedType])):
            if miRHits_Ca[seedType][i]:
                outFile.write('%s : %s : orthoType_%s : %s : %.2f : %.4f Seqs=%s\n'\
                              %(miRobj_Ca.name,
                                seedType,
                                i,
                                len(miRHits_Ca[seedType][i][0]),
                                miRHits_Ca[seedType][i][2],
                                miRHits_Ca[seedType][i][1],
                                ','.join(sorted([str(x) for x in miRHits_Ca[seedType][i][0]]))))
    outFile.flush()