def calcFDRStats(realCount,ctrlCounts): FDRs = [] for i in range(len(ctrlCounts)): ctrl = ctrlCounts[i] real = realCount if real == 0: return (None,None) else: fdr = float(ctrl)/real if fdr > 1: FDRs.append(1.0) else: FDRs.append(fdr) return mathDefs.stdDv(FDRs,'median')
def calcCtrlMedianStDv(self): """ Iterate through the self.ctrlCount data and calculate the MEDIAN and stdDev from the MEDIAN for each seedType permutation set. """ # Make sure we have already run self.countHitsInOrthos() assert self.matchCounts and self.ctrlCounts, \ 'ERROR: It looks like we have not called countHitsInOrthos().' # Initialize data structure for seedType in _seedModels: self.ctrlMedianStd[seedType] = [[None,None],[None,None],[None,None],[None,None]] # SEE self.ctrlStats def in __init__ for format definition # Iterate through self.ctrlCounts for seedType in self.ctrlCounts: # Calc median and medianStDv for each countCategory in self.ctrlCounts[seedType] for i in range(len(self.ctrlMedianStd[seedType])): medStDv, median = mathDefs.stdDv([x[i] for x in self.ctrlCounts[seedType]],kind='median',df=1) self.ctrlMedianStd[seedType][i][0] = median self.ctrlMedianStd[seedType][i][1] = medStDv
def calcFDRStats(goDictEntry): realCount = len(goDictEntry[0]) ctrlCounts = [len(x) for x in goDictEntry[1]] FDRs = [] for i in range(len(ctrlCounts)): ctrl = ctrlCounts[i] real = realCount if real == 0: return (None) else: fdr = float(ctrl)/real if fdr > 1: FDRs.append(1.0) else: FDRs.append(fdr) maths = mathDefs.stdDv(FDRs,'median') maths = list(maths) return [realCount]+maths
def writeTargetsFdrMedMeth(miRobj_Ca,miRobj_Cb,oFile): #print 'Processing %s...' % (miRobj_Ca.name) miRHits_Ca = miRobj_Ca.reportGeneTargetsFdrMedMeth(stdvLimit=stdvsAboveMed, consFdrThresh=consFdrThreshold, divide=0) totReal = [None,set(),set(),set()] totCtrl = initList(len(miRobj_Ca.ctrlEvents[miRobj_Ca.ctrlEvents.keys()[0]]),[None, set(), set(),set()]) # Calulate combined FDR for miRNA using Ctrl_b data from seedTypes that passed # the reportGeneTargetsFdrMedMeth() Ctrl_a screen. # >> Gather and combine data from passed seedTypes: for oType in range(1,4): for sType in miRT._seedModels: if miRHits_Ca[sType][oType]: trLen_0 = len(totReal[oType]) totReal[oType].update(miRHits_Ca[sType][oType][0]) rUpdtLen = len(miRHits_Ca[sType][oType][0]) trLen_1 = len(totReal[oType]) None for i in range(len(totCtrl)): tciLen_0 = len(totCtrl[i][oType]) totCtrl[i][oType].update(miRobj_Cb.ctrlEvents[sType][i][oType]) cUpdtLen = len(miRobj_Cb.ctrlEvents[sType][i][oType]) tciLen_1 = len(totCtrl[i][oType]) None # >> Calculate separate FDRs for each Ctrl_b group: totalsData = [None,None,None,None] for oType in range(1,4): if totReal[oType] == set(): continue tempFDRs = [] for i in range(len(totCtrl)): ctrlVal = len(totCtrl[i][oType]) realVal = len(totReal[oType]) if float(ctrlVal)/realVal >= 1: tempFDRs.append(1.0) else: tempFDRs.append(float(ctrlVal)/realVal) tLen = len(tempFDRs) oFDRstdv,oFDRmed = mathDefs.stdDv(tempFDRs,'median') cons_oFDR = oFDRmed + (stdvsAboveMed*oFDRstdv) totalsData[oType] = [totReal[oType],oFDRmed,cons_oFDR] # Write out Totals data: print miRobj_Cb.name outFile.write('-- %s --\n' % (miRobj_Cb.name)) for i in range(1,len(totalsData)): if totalsData[i]: outFile.write('%s : allPassedSeedsFor_%s : %s : %s : %s Seqs=%s\n' \ %(miRobj_Cb.name, i, len(totalsData[i][0]), totalsData[i][2], totalsData[i][1], ','.join(sorted([str(x) for x in totalsData[i][0]])))) # write out passed Seed data for seedType in sorted(miRHits_Ca): for i in range(1,len(miRHits_Ca[seedType])): if miRHits_Ca[seedType][i]: outFile.write('%s : %s : orthoType_%s : %s : %.2f : %.4f Seqs=%s\n'\ %(miRobj_Ca.name, seedType, i, len(miRHits_Ca[seedType][i][0]), miRHits_Ca[seedType][i][2], miRHits_Ca[seedType][i][1], ','.join(sorted([str(x) for x in miRHits_Ca[seedType][i][0]])))) outFile.flush()