Ejemplo n.º 1
0
    def plotRLines(self,
                   xVals,
                   yLines,
                   saveRawData=True,
                   alsoOpenAndClose=True,
                   colors=None,
                   legend=None,
                   lty=None,
                   **kwArgs):
        '''
        xVals: one list containing x-values
        yLines: list of lists containing y-values
        colors: list of colors to use for each line
        legend: list of legend text per line (color)
        lty: line types for r.legend
        any extra params in kwArgs is sent to r.plot. Use to send for example xlab,ylab
        '''
        from proto.RSetup import r
        numLines = range(len(yLines))

        if alsoOpenAndClose:
            self.openRFigure()

        yMax = max(max(yVals) for yVals in yLines)

        assert len(yLines) < 5 or colors is not None
        if colors is None:
            colors = ['black', 'red', 'green', 'blue', 'grey'][0:len(yLines)]

        if lty is None:
            lty = [1 for i in numLines]
        #if legend == None:
        #legend = ['' for i in range(len(yLines))]

        r.plot(r.unlist(xVals),
               r.unlist(xVals),
               ylim=r.unlist([0, yMax]),
               type='n',
               **kwArgs)  #,col='black' )
        for i, yVals in enumerate(yLines):
            r.lines(r.unlist(xVals), r.unlist(yVals), col=colors[i])
        if legend is not None:
            r.legend('topleft', legend, col=colors, lty=lty)

        if saveRawData:
            rawFn = self.getDiskPath() + '.raw.txt'
            f = open(rawFn, 'w')
            f.write('x <- c(%s)' % ','.join(str(val) for val in xVals) + '\n')
            for i, yVals in enumerate(yLines):
                f.write('y%i <- c(%s)' %
                        (i, ','.join(str(val) for val in yVals)) + '\n')
            f.close()

        if alsoOpenAndClose:
            self.closeRFigure()
Ejemplo n.º 2
0
 def makeNumSamplesFigure(self):
     numSamples = [
         test.getTotalNumSamples() for test in sorted(self._sampleCounters)
     ]
     numExtremes = [
         test._extremeCount for test in sorted(self._sampleCounters)
     ]
     r.plot(r.unlist(numSamples),
            type='l',
            xlab='Tests',
            ylab='Num samples',
            col='black')
     r.lines(r.unlist(numExtremes), col='green')
     rpy1.legend('topleft', ['numSamples', 'numExtremeSamples'],
                 col=['black', 'green'],
                 lty=1)
Ejemplo n.º 3
0
    def execute(cls, choices, galaxyFn=None, username=''):
        '''
        Is called when execute-button is pushed by web-user. Should print
        output as HTML to standard out, which will be directed to a results page
        in Galaxy history. If getOutputFormat is anything else than HTML, the
        output should be written to the file with path galaxyFn. If needed,
        StaticFile can be used to get a path where additional files can be put
        (e.g. generated image files). choices is a list of selections made by
        web-user in each options box.
        '''
        contents = cls._getContentsOfHistoryFile(choices.history)
        averages = []
        for row in contents:
            row = [float(x) for x in row]
            averages.append(sum(row) / len(row))

        from proto.hyperbrowser.StaticFile import GalaxyRunSpecificFile
        plot = GalaxyRunSpecificFile(['average.png'], galaxyFn)
        plot.openRFigure()
        from proto.RSetup import r
        r.plot(range(1,
                     len(averages) + 1),
               averages,
               xlab='Rows',
               ylab='Average value')
        plot.closeRFigure()
        #print plot.getURL() + '<br>'
        #print plot.getDiskPath()

        from proto.hyperbrowser.HtmlCore import HtmlCore
        core = HtmlCore()
        core.begin()
        core.bigHeader('Plot of row averages')
        core.paragraph('This is a plot of the average values of each row:')
        core.image(plot.getURL())
        core.paragraph(plot.getURL())
        core.paragraph(plot.getDiskPath())
        #core.paragraph(str(averages))
        core.end()
        print str(core)
Ejemplo n.º 4
0
    def compareCutoffSchemes(maxNumSamples,
                             h,
                             fdrThreshold,
                             totalNumTests,
                             stepSize,
                             numReplications,
                             a,
                             b,
                             galaxyFn=None):
        print '<PRE>'
        print 'Comparing cutoff schemes with parameters: maxNumSamples=%i, h=%i, fdrThreshold=%.2f, totalNumTests=%i, numReplications=%i' % (
            maxNumSamples, h, fdrThreshold, totalNumTests, numReplications)
        print 'stepSize: ', stepSize
        print 'H1 p-values drawn from beta with a=%.3f and b=%.3f' % (a, b)
        print 'Minimum achieveable p-value is %.5f, which gives minimum Bonferroni-corrected p-value of %.5f (compares to a fdr threshold of %.2f)' % (
            1.0 / maxNumSamples,
            (1.0 / maxNumSamples) * totalNumTests, fdrThreshold)

        #estimate time use:
        prevTime = time.time()
        Simulator(maxNumSamples, None, None, a, b,
                  fdrThreshold).numSamplesAsFunctionOfNumH1(1, 1, 1)
        baseMeasure = time.time() - prevTime
        if type(stepSize) == int:
            numSteps = len(range(0, totalNumTests + 1, stepSize))
        elif type(stepSize) == list:
            numSteps = len(stepSize)
        withOnlyMaxNumEstimate = baseMeasure * totalNumTests * numSteps * numReplications
        #print 'Estimated running time: between %i and %i seconds.' % (withOnlyMaxNumEstimate, withOnlyMaxNumEstimate*3)
        print 'Estimated running time: around %i seconds. (%.1f hours)' % (
            withOnlyMaxNumEstimate, withOnlyMaxNumEstimate / 3600.0)

        sortedKeys, onlyMaxCutoff, onlyMaxNumRejected, onlyMaxType1Errors, onlyMaxType2Errors = Simulator(
            maxNumSamples, None, None, a, b, fdrThreshold,
            galaxyFn).numSamplesAsFunctionOfNumH1(totalNumTests, stepSize,
                                                  numReplications)
        sortedKeys, seqMcCutoff, seqMcNumRejected, seqMcType1Errors, seqMcType2Errors = Simulator(
            maxNumSamples, h, None, a, b, fdrThreshold,
            galaxyFn).numSamplesAsFunctionOfNumH1(totalNumTests, stepSize,
                                                  numReplications)
        sortedKeys, mcFdrCutoff, mcFdrNumRejected, mcFdrType1Errors, mcFdrType2Errors = Simulator(
            None, h, fdrThreshold, a, b, fdrThreshold,
            galaxyFn).numSamplesAsFunctionOfNumH1(totalNumTests, stepSize,
                                                  numReplications)
        maxY = max(max(s) for s in [onlyMaxCutoff, seqMcCutoff, mcFdrCutoff])
        #minY = min( min(s) for s in [onlyMaxCutoff, seqMcCutoff, McFdrCutoff])
        minY = 0

        print 'Time spent: ', time.time() - prevTime, ' secs'
        print '</PRE>'

        #plotStaticFile.getDiskPath(True)
        if galaxyFn is not None:
            #print 'Generating aggregate McFdr simulation figures'
            plotStaticFile = GalaxyRunSpecificFile(['mainPlot.png'], galaxyFn)
            if type(stepSize) is int:
                allNumH1s = range(0, totalNumTests + 1, stepSize)
            elif type(stepSize) is list:
                allNumH1s = stepSize
            for numH1 in allNumH1s:
                catalogStaticFile = GalaxyRunSpecificFile(
                    [str(numH1), 'cat.html'], galaxyFn)
                print catalogStaticFile.getLink('Tests with #True H1s=%i' %
                                                numH1), '<br>'

            #plotStaticFile.openRFigure()
            #r.png(filename=plotFn, height=600, width=800, units='px', pointsize=12, res=72)
            #r.plot(r.unlist(sortedKeys), r.unlist(onlyMaxCutoff), ylim=r.unlist([minY,maxY]), type='l', xlab='Number of true H1s', ylab='Total MC samples' , col='black')
            #r.lines(r.unlist(sortedKeys), r.unlist(seqMcCutoff), col='red' )
            #r.lines(r.unlist(sortedKeys), r.unlist(mcFdrCutoff), col='green' )
            #r.legend('topleft',['BasicMc','SeqMc','McFdr'],col=['black','red','green'],lty=1)
            plotStaticFile.plotRLines(
                sortedKeys, [onlyMaxCutoff, seqMcCutoff, mcFdrCutoff],
                xlab='Number of true H1s',
                ylab='Total MC samples',
                legend=['BasicMc', 'SeqMc', 'McFdr'])
            #r('dev.off()')
            #plotStaticFile.closeRFigure()

            print plotStaticFile.getLink(
                'View main plot'
            ) + ' of sumSamples as function of #H1s.', '<br>'

            numRejectedPlotStaticFile = GalaxyRunSpecificFile(
                ['secondaryPlot.png'], galaxyFn)
            numRejectedPlotStaticFile.plotRLines(
                sortedKeys,
                [onlyMaxNumRejected, seqMcNumRejected, mcFdrNumRejected],
                xlab='Number of true H1s',
                ylab='Num rejected tests',
                legend=['BasicMc', 'SeqMc', 'McFdr'])
            #numRejectedPlotStaticFile.openRFigure()
            #r.png(filename=plotFn, height=600, width=800, units='px', pointsize=12, res=72)
            #r.plot(r.unlist(sortedKeys), r.unlist(onlyMaxNumRejected), ylim=r.unlist([0,totalNumTests]), type='l', xlab='Number of true H1s', ylab='Num rejected tests',col='black' )
            #r.lines(r.unlist(sortedKeys), r.unlist(seqMcNumRejected), col='red' )
            #r.lines(r.unlist(sortedKeys), r.unlist(mcFdrNumRejected), col='green' )
            #r.lines(r.unlist(sortedKeys), r.unlist(sortedKeys), col='black', lty='dotted' ) #As this corresponds to perfect estimation..
            #r.legend('topleft',['BasicMc','SeqMc','McFdr','NumFromH1'],col=['black','red','green','black'],lty=[1,1,1,2])
            #r('dev.off()')
            #numRejectedPlotStaticFile.closeRFigure()
            print numRejectedPlotStaticFile.getLink(
                'View secondary plot'
            ) + ' of #true H1s vs #tests rejected.', '<br>'

            #Classification errors
            classificationErrorPlotStaticFile = GalaxyRunSpecificFile(
                ['errors.png'], galaxyFn)
            classificationErrorPlotStaticFile.openRFigure()
            yMax = max(
                max(x) for x in [
                    mcFdrType2Errors, mcFdrType1Errors, seqMcType2Errors,
                    seqMcType1Errors, onlyMaxType2Errors, onlyMaxType1Errors
                ])
            #r.png(filename=plotFn, height=600, width=800, units='px', pointsize=12, res=72)
            r.plot(r.unlist(sortedKeys),
                   r.unlist(onlyMaxType1Errors),
                   ylim=r.unlist([0, yMax]),
                   type='l',
                   xlab='Number of true H1s',
                   ylab='Type 1/2 errors',
                   col='black')
            r.lines(r.unlist(sortedKeys),
                    r.unlist(onlyMaxType2Errors),
                    col='black',
                    lty='dotted')
            r.lines(r.unlist(sortedKeys),
                    r.unlist(seqMcType1Errors),
                    col='red')
            r.lines(r.unlist(sortedKeys),
                    r.unlist(seqMcType2Errors),
                    col='red',
                    lty='dotted')
            r.lines(r.unlist(sortedKeys),
                    r.unlist(mcFdrType1Errors),
                    col='green')
            r.lines(r.unlist(sortedKeys),
                    r.unlist(mcFdrType2Errors),
                    col='green',
                    lty='dotted')
            rpy1.legend('topleft', [
                'BasicMcType1', 'SeqMcType1', 'McFdrType1', 'BasicMcType2',
                'SeqMcType2', 'McFdrType2'
            ],
                        col=['black', 'red', 'green', 'black', 'red', 'green'],
                        lty=[1, 1, 1, 2, 2, 2])
            #r('dev.off()')
            classificationErrorPlotStaticFile.closeRFigure()
            print classificationErrorPlotStaticFile.getLink(
                'View Type 1/2 error plot'
            ) + ' as function of number of true H1.', '<br>'

            #Classification errors
            onlyMaxAccuracy = [
                sum(errors) * 1.0 / totalNumTests
                for errors in zip(onlyMaxType1Errors, onlyMaxType2Errors)
            ]
            seqMcAccuracy = [
                sum(errors) * 1.0 / totalNumTests
                for errors in zip(seqMcType1Errors, seqMcType2Errors)
            ]
            mcFdrAccuracy = [
                sum(errors) * 1.0 / totalNumTests
                for errors in zip(mcFdrType1Errors, mcFdrType2Errors)
            ]

            accuracyPlotStaticFile = GalaxyRunSpecificFile(['accuracy.png'],
                                                           galaxyFn)
            accuracyPlotStaticFile.openRFigure()
            yMax = 0.2  #just set ad hoc here..
            #r.png(filename=plotFn, height=600, width=800, units='px', pointsize=12, res=72)
            r.plot(r.unlist(sortedKeys),
                   r.unlist(onlyMaxAccuracy),
                   ylim=r.unlist([0, yMax]),
                   type='l',
                   xlab='Number of true H1s',
                   ylab='Accuracy',
                   col='black')
            r.lines(r.unlist(sortedKeys), r.unlist(seqMcAccuracy), col='red')
            r.lines(r.unlist(sortedKeys), r.unlist(mcFdrAccuracy), col='green')
            rpy1.legend('topleft', ['BasicMc', 'SeqMc', 'McFdr', 'NumFromH1'],
                        col=['black', 'red', 'green'],
                        lty=[1, 1, 1])
            #r('dev.off()')
            accuracyPlotStaticFile.closeRFigure()
            print accuracyPlotStaticFile.getLink(
                'View accuracy plot'
            ) + ' as function of number of true H1.', '<br>'

            #False positive rates
            onlyMaxFpr = [
                float(fp) / pos if pos != 0 else 0
                for fp, pos in zip(onlyMaxType1Errors, onlyMaxNumRejected)
            ]
            seqMcFpr = [
                float(fp) / pos if pos != 0 else 0
                for fp, pos in zip(seqMcType1Errors, seqMcNumRejected)
            ]
            mcFdrFpr = [
                float(fp) / pos if pos != 0 else 0
                for fp, pos in zip(mcFdrType1Errors, mcFdrNumRejected)
            ]

            fprPlotStaticFile = GalaxyRunSpecificFile(['fpr.png'], galaxyFn)
            fprPlotStaticFile.plotRLines(sortedKeys,
                                         [onlyMaxFpr, seqMcFpr, mcFdrFpr],
                                         legend=['BasicMc', 'SeqMc', 'McFdr'])
            print fprPlotStaticFile.getLink(
                'View FPR plot') + ' as function of number of true H1.', '<br>'