def singleSimulation(self, numH0, numH1, replicateIndex, verbose=False): tests = MultipleTestCollection(numH0, numH1, self._maxNumSamples, self._h, self._fdrThreshold,self._a,self._b) tests.addSamples(self.NUM_SAMPLES_INITIALLY) while not tests.allTestsAreDetermined(): tests.addSamples(self.NUM_SAMPLES_PER_CHUNK) #if verbose: #print tests.getTotalNumSamples() #As sampling is now anyway over, we set fdrThreshold to a threshold used after computations are finished (i.e. affects final rejection/acception, but not stopping of samples) tests.setFdrThresholdAtAllCounters(self._postFdrThreshold) #print 'FINALLY, #samples: ', if self._galaxyFn is not None: if self._h is None: scheme = 'Basic' elif self._fdrThreshold is None: scheme = 'Sequential' else: scheme = 'McFdr' staticFile = GalaxyRunSpecificFile([scheme,str(numH1),str(replicateIndex),'PandQvals.txt'], self._galaxyFn) tests.writeAllPandQVals(staticFile.getFile() ) linkToRaw = staticFile.getLink('Raw p and q-vals') + ' under %s scheme with %i true H1, (replication %i)' % (scheme, numH1, replicateIndex) figStaticFile = GalaxyRunSpecificFile([scheme,str(numH1),str(replicateIndex),'PandQvals.png'], self._galaxyFn) figStaticFile.openRFigure() tests.makeAllPandQValsFigure() figStaticFile.closeRFigure() linkToFig = figStaticFile.getLink(' (p/q-figure) ') + '<br>' figNumSamplesStaticFile = GalaxyRunSpecificFile([scheme,str(numH1),str(replicateIndex),'NumSamples.png'], self._galaxyFn) figNumSamplesStaticFile.openRFigure() tests.makeNumSamplesFigure() figNumSamplesStaticFile.closeRFigure() linkToNumSamplesFig = figNumSamplesStaticFile.getLink(' (numSamples-figure) ') + '<br>' catalogStaticFile = GalaxyRunSpecificFile([str(numH1),'cat.html'], self._galaxyFn) catalogStaticFile.writeTextToFile(linkToRaw + linkToFig + linkToNumSamplesFig, mode='a') #if verbose: #print sorted(tests.getFdrVals()) #print 'NumS ign Below 0.2: ', sum([1 if t<0.2 else 0 for t in tests.getFdrVals()]) #return tests.getTotalNumSamples(), tests.getTotalNumRejected() return tests.getTotalNumSamples(), tests.getTotalNumRejected(), tests.getClassificationSummaries()
def compareCutoffSchemes(maxNumSamples, h, fdrThreshold, totalNumTests, stepSize, numReplications,a,b, galaxyFn=None): print '<PRE>' print 'Comparing cutoff schemes with parameters: maxNumSamples=%i, h=%i, fdrThreshold=%.2f, totalNumTests=%i, numReplications=%i' % (maxNumSamples, h, fdrThreshold, totalNumTests, numReplications) print 'stepSize: ',stepSize print 'H1 p-values drawn from beta with a=%.3f and b=%.3f' % (a,b) print 'Minimum achieveable p-value is %.5f, which gives minimum Bonferroni-corrected p-value of %.5f (compares to a fdr threshold of %.2f)' % (1.0/maxNumSamples, (1.0/maxNumSamples)*totalNumTests, fdrThreshold) #estimate time use: prevTime= time.time() Simulator(maxNumSamples, None, None,a,b,fdrThreshold).numSamplesAsFunctionOfNumH1( 1, 1, 1) baseMeasure = time.time() - prevTime if type(stepSize)==int: numSteps = len(range(0,totalNumTests+1,stepSize)) elif type(stepSize)==list: numSteps = len(stepSize) withOnlyMaxNumEstimate = baseMeasure * totalNumTests * numSteps * numReplications #print 'Estimated running time: between %i and %i seconds.' % (withOnlyMaxNumEstimate, withOnlyMaxNumEstimate*3) print 'Estimated running time: around %i seconds. (%.1f hours)' % (withOnlyMaxNumEstimate, withOnlyMaxNumEstimate/3600.0) sortedKeys, onlyMaxCutoff, onlyMaxNumRejected, onlyMaxType1Errors, onlyMaxType2Errors = Simulator(maxNumSamples, None, None,a,b,fdrThreshold, galaxyFn).numSamplesAsFunctionOfNumH1( totalNumTests, stepSize, numReplications) sortedKeys, seqMcCutoff, seqMcNumRejected, seqMcType1Errors, seqMcType2Errors = Simulator(maxNumSamples, h, None,a,b,fdrThreshold, galaxyFn).numSamplesAsFunctionOfNumH1(totalNumTests, stepSize, numReplications) sortedKeys, mcFdrCutoff, mcFdrNumRejected, mcFdrType1Errors, mcFdrType2Errors = Simulator(None, h, fdrThreshold,a,b,fdrThreshold, galaxyFn).numSamplesAsFunctionOfNumH1(totalNumTests, stepSize, numReplications) maxY = max( max(s) for s in [onlyMaxCutoff, seqMcCutoff, mcFdrCutoff]) #minY = min( min(s) for s in [onlyMaxCutoff, seqMcCutoff, McFdrCutoff]) minY=0 print 'Time spent: ',time.time() - prevTime, ' secs' print '</PRE>' #plotStaticFile.getDiskPath(True) if galaxyFn is not None: #print 'Generating aggregate McFdr simulation figures' plotStaticFile = GalaxyRunSpecificFile(['mainPlot.png'],galaxyFn) if type(stepSize) is int: allNumH1s = range(0,totalNumTests+1,stepSize) elif type(stepSize) is list: allNumH1s = stepSize for numH1 in allNumH1s: catalogStaticFile = GalaxyRunSpecificFile([str(numH1),'cat.html'], galaxyFn) print catalogStaticFile.getLink( 'Tests with #True H1s=%i' % numH1 ), '<br>' #plotStaticFile.openRFigure() #r.png(filename=plotFn, height=600, width=800, units='px', pointsize=12, res=72) #r.plot(r.unlist(sortedKeys), r.unlist(onlyMaxCutoff), ylim=r.unlist([minY,maxY]), type='l', xlab='Number of true H1s', ylab='Total MC samples' , col='black') #r.lines(r.unlist(sortedKeys), r.unlist(seqMcCutoff), col='red' ) #r.lines(r.unlist(sortedKeys), r.unlist(mcFdrCutoff), col='green' ) #r.legend('topleft',['BasicMc','SeqMc','McFdr'],col=['black','red','green'],lty=1) plotStaticFile.plotRLines(sortedKeys, [onlyMaxCutoff,seqMcCutoff,mcFdrCutoff], xlab='Number of true H1s', ylab='Total MC samples', legend=['BasicMc','SeqMc','McFdr']) #r('dev.off()') #plotStaticFile.closeRFigure() print plotStaticFile.getLink('View main plot') + ' of sumSamples as function of #H1s.', '<br>' numRejectedPlotStaticFile = GalaxyRunSpecificFile(['secondaryPlot.png'],galaxyFn) numRejectedPlotStaticFile.plotRLines(sortedKeys, [onlyMaxNumRejected,seqMcNumRejected,mcFdrNumRejected],xlab='Number of true H1s', ylab='Num rejected tests',legend=['BasicMc','SeqMc','McFdr']) #numRejectedPlotStaticFile.openRFigure() #r.png(filename=plotFn, height=600, width=800, units='px', pointsize=12, res=72) #r.plot(r.unlist(sortedKeys), r.unlist(onlyMaxNumRejected), ylim=r.unlist([0,totalNumTests]), type='l', xlab='Number of true H1s', ylab='Num rejected tests',col='black' ) #r.lines(r.unlist(sortedKeys), r.unlist(seqMcNumRejected), col='red' ) #r.lines(r.unlist(sortedKeys), r.unlist(mcFdrNumRejected), col='green' ) #r.lines(r.unlist(sortedKeys), r.unlist(sortedKeys), col='black', lty='dotted' ) #As this corresponds to perfect estimation.. #r.legend('topleft',['BasicMc','SeqMc','McFdr','NumFromH1'],col=['black','red','green','black'],lty=[1,1,1,2]) #r('dev.off()') #numRejectedPlotStaticFile.closeRFigure() print numRejectedPlotStaticFile.getLink('View secondary plot') + ' of #true H1s vs #tests rejected.', '<br>' #Classification errors classificationErrorPlotStaticFile = GalaxyRunSpecificFile(['errors.png'],galaxyFn) classificationErrorPlotStaticFile.openRFigure() yMax = max( max(x) for x in [mcFdrType2Errors,mcFdrType1Errors,seqMcType2Errors,seqMcType1Errors,onlyMaxType2Errors,onlyMaxType1Errors ]) #r.png(filename=plotFn, height=600, width=800, units='px', pointsize=12, res=72) r.plot(r.unlist(sortedKeys), r.unlist(onlyMaxType1Errors), ylim=r.unlist([0,yMax]), type='l', xlab='Number of true H1s', ylab='Type 1/2 errors',col='black' ) r.lines(r.unlist(sortedKeys), r.unlist(onlyMaxType2Errors), col='black', lty='dotted' ) r.lines(r.unlist(sortedKeys), r.unlist(seqMcType1Errors), col='red' ) r.lines(r.unlist(sortedKeys), r.unlist(seqMcType2Errors), col='red', lty='dotted' ) r.lines(r.unlist(sortedKeys), r.unlist(mcFdrType1Errors), col='green' ) r.lines(r.unlist(sortedKeys), r.unlist(mcFdrType2Errors), col='green', lty='dotted' ) rpy1.legend('topleft',['BasicMcType1','SeqMcType1','McFdrType1','BasicMcType2','SeqMcType2','McFdrType2'],col=['black','red','green','black','red','green'],lty=[1,1,1,2,2,2]) #r('dev.off()') classificationErrorPlotStaticFile.closeRFigure() print classificationErrorPlotStaticFile.getLink('View Type 1/2 error plot') + ' as function of number of true H1.', '<br>' #Classification errors onlyMaxAccuracy = [ sum(errors)*1.0/totalNumTests for errors in zip(onlyMaxType1Errors, onlyMaxType2Errors)] seqMcAccuracy = [ sum(errors)*1.0/totalNumTests for errors in zip(seqMcType1Errors, seqMcType2Errors)] mcFdrAccuracy = [ sum(errors)*1.0/totalNumTests for errors in zip(mcFdrType1Errors, mcFdrType2Errors)] accuracyPlotStaticFile = GalaxyRunSpecificFile(['accuracy.png'],galaxyFn) accuracyPlotStaticFile.openRFigure() yMax = 0.2 #just set ad hoc here.. #r.png(filename=plotFn, height=600, width=800, units='px', pointsize=12, res=72) r.plot(r.unlist(sortedKeys), r.unlist(onlyMaxAccuracy), ylim=r.unlist([0,yMax]), type='l', xlab='Number of true H1s', ylab='Accuracy',col='black' ) r.lines(r.unlist(sortedKeys), r.unlist(seqMcAccuracy), col='red' ) r.lines(r.unlist(sortedKeys), r.unlist(mcFdrAccuracy), col='green' ) rpy1.legend('topleft',['BasicMc','SeqMc','McFdr','NumFromH1'],col=['black','red','green'],lty=[1,1,1]) #r('dev.off()') accuracyPlotStaticFile.closeRFigure() print accuracyPlotStaticFile.getLink('View accuracy plot') + ' as function of number of true H1.', '<br>' #False positive rates onlyMaxFpr= [ float(fp)/pos if pos!=0 else 0 for fp,pos in zip(onlyMaxType1Errors, onlyMaxNumRejected)] seqMcFpr= [ float(fp)/pos if pos!=0 else 0 for fp,pos in zip(seqMcType1Errors, seqMcNumRejected)] mcFdrFpr= [ float(fp)/pos if pos!=0 else 0 for fp,pos in zip(mcFdrType1Errors, mcFdrNumRejected)] fprPlotStaticFile = GalaxyRunSpecificFile(['fpr.png'],galaxyFn) fprPlotStaticFile.plotRLines(sortedKeys, [onlyMaxFpr, seqMcFpr, mcFdrFpr], legend=['BasicMc','SeqMc','McFdr']) print fprPlotStaticFile.getLink('View FPR plot') + ' as function of number of true H1.', '<br>'
def execute(choices, galaxyFn=None, username=''): '''Is called when execute-button is pushed by web-user. Should print output as HTML to standard out, which will be directed to a results page in Galaxy history. If getOutputFormat is anything else than HTML, the output should be written to the file with path galaxyFn.gtr If needed, StaticFile can be used to get a path where additional files can be put (e.g. generated image files). choices is a list of selections made by web-user in each options box. ''' print 'temporarily overriding tool, running McFdr2 simulation..' from test.sandbox.aux.McFdr2 import analyzeSampleNumAccuracy for numSamples in [100,1000,10000]: print '' print 'numSamples %s: ' % numSamples, for i in range(3): print analyzeSampleNumAccuracy(numSamples), return from gold.application.RSetup import r from numpy import array,minimum pVal,minNumSamples,maxNumSamples,chunkSize,numTests = [float(x) for x in choices[:-1]] print 'pVal:%.2f, minNumSamples:%i, maxNumSamples:%i, chunkSize:%i, numTests:%i' % (pVal,minNumSamples,maxNumSamples,chunkSize,numTests) assert (maxNumSamples-minNumSamples)%chunkSize == 0 assert numTests == 1 #More not yet supported. Should in McFdr be something like the min-max, i.e. the minimum across iterations of the maximum p-value across tests.. pValEstimation = choices[-1] assert pValEstimation in ['Davison','ML'] if pValEstimation=='Davison': pFunc = lambda k,n:1.0*(k+1)/(n+1) else: pFunc = lambda k,n:1.0*(k)/n numRepl = 10**4 stdAtMin = [pFunc(k,minNumSamples) for k in r.rbinom(numRepl,minNumSamples,pVal)] stdAtMax = [pFunc(k,maxNumSamples) for k in r.rbinom(numRepl,maxNumSamples,pVal)] mcFdrBestPVals = array([1.0]*numRepl) mcFdrSamples = minNumSamples #array([minNumSamples]*numRepl) mcFdrExtremes = array(r.rbinom(numRepl,minNumSamples,pVal)) while mcFdrSamples<maxNumSamples: tempMcFdrPVals = pFunc(mcFdrExtremes,mcFdrSamples) mcFdrBestPVals = minimum(mcFdrBestPVals,tempMcFdrPVals) mcFdrSamples += chunkSize mcFdrExtremes += array(r.rbinom(numRepl,chunkSize,pVal)) tempMcFdrPVals = pFunc(mcFdrExtremes,mcFdrSamples) mcFdrBestPVals = minimum(mcFdrBestPVals,tempMcFdrPVals) assert mcFdrSamples == maxNumSamples print 'Mean values<br>' print 'AtMin:%.7f, AtMax:%.7f, McFdr:%.7f' % tuple([array(x).mean() for x in [stdAtMin, stdAtMax, mcFdrBestPVals]]) breaks = [pVal*2*x/100.0 for x in range(0,101)] +[1.0] histRes = r.hist(stdAtMin,breaks=breaks,plot=False) xVals = histRes['mids'] yValsStdAtMin = histRes['density'] histRes = r.hist(stdAtMax,breaks=breaks,plot=False) assert xVals == histRes['mids'] yValsStdAtMax = histRes['density'] histRes = r.hist(mcFdrBestPVals,breaks=breaks,plot=False) assert xVals == histRes['mids'] yValsMcFdr = histRes['density'] staticFile = GalaxyRunSpecificFile(['pDistr.png'],galaxyFn) staticFile.openRFigure() staticFile.plotRLines(xVals, [yValsStdAtMin, yValsStdAtMax, yValsMcFdr],alsoOpenAndClose=False,xlab='p-value',ylab='density',xlim=[0,2*pVal]) r.abline(v=pVal,lty='dotted',col='yellow') staticFile.closeRFigure() print staticFile.getLink('View estimated pval distribution')