コード例 #1
0
ファイル: testBigboxPerf.py プロジェクト: nkhuyu/ufora
    def dataCreationTest(self, totalMB, workers, threadsPerWorker, testName):
        s3 = InMemoryS3Interface.InMemoryS3InterfaceFactory()

        #we wish we could actually test that we achieve saturation here but we can't yet.
        text = """size(Vector.range(%s, {_*_}))""" % (totalMB * 1024 * 1024 / 8)

        t0 = time.time()

        result,simulation = self.computeUsingSeveralWorkers(
                "1+1",
                s3,
                workers,
                timeout = 120,
                memoryLimitMb = 55 * 1024 / workers,
                threadCount = threadsPerWorker,
                returnSimulation = True,
                useInMemoryCache = False
                )

        try:
            t0 = time.time()
            result = simulation.compute(text, timeout=120)
            totalTimeToReturnResult = time.time() - t0

            PerformanceTestReporter.recordTest(testName, totalTimeToReturnResult, None)
        finally:
            simulation.teardown()
コード例 #2
0
    def test_splitToRowMajor(self):
        s3 = InMemoryS3Interface.InMemoryS3InterfaceFactory()

        nRows = 100000
        nColumns = 50

        result, simulation = InMemoryCumulusSimulation.computeUsingSeveralWorkers(
            self.setupScript(nRows, nColumns),
            s3,
            1,
            timeout=30,
            memoryLimitMb=8 * 1024,
            threadCount=4,
            returnSimulation=True,
            useInMemoryCache=False)

        try:
            self.assertTrue(result.isResult())

            setup = result.asResult.result

            t0 = time.time()
            result = simulation.compute(self.splitToRowMajorScript(),
                                        timeout=360,
                                        chunks=setup)
            totalTimeToReturnResult = time.time() - t0

            self.assertTrue(result.isResult())

            PerformanceTestReporter.recordTest(
                "algorithms.text.splitToRowMajor.%srows_%scolumns" %
                (nRows, nColumns), totalTimeToReturnResult, None)

        finally:
            simulation.teardown()
コード例 #3
0
    def dataCreationTest(self, totalMB, workers, threadsPerWorker, testName):
        s3 = InMemoryS3Interface.InMemoryS3InterfaceFactory()

        #we wish we could actually test that we achieve saturation here but we can't yet.
        text = """size(Vector.range(%s, {_*_}))""" % (totalMB * 1024 * 1024 /
                                                      8)

        t0 = time.time()

        result, simulation = self.computeUsingSeveralWorkers(
            "1+1",
            s3,
            workers,
            timeout=120,
            memoryLimitMb=55 * 1024 / workers,
            threadCount=threadsPerWorker,
            returnSimulation=True,
            useInMemoryCache=False)

        try:
            t0 = time.time()
            result = simulation.compute(text, timeout=120)
            totalTimeToReturnResult = time.time() - t0

            PerformanceTestReporter.recordTest(testName,
                                               totalTimeToReturnResult, None)
        finally:
            simulation.teardown()
コード例 #4
0
    def stringCreationAndSumTest(self, totalStrings, workers, threadsPerWorker, testName):
        s3 = InMemoryS3Interface.InMemoryS3InterfaceFactory()

        #we wish we could actually test that we achieve saturation here but we can't yet.
        text = """Vector.range(%s, String).sum(size)""" % totalStrings

        t0 = time.time()

        _, simulation = \
            self.computeUsingSeveralWorkers(
                "1+1",
                s3,
                workers,
                timeout = 240,
                memoryLimitMb = 55 * 1024 / workers,
                threadCount = threadsPerWorker,
                returnSimulation = True,
                useInMemoryCache = False
                )

        try:
            t0 = time.time()
            result = simulation.compute(text, timeout=240)
            totalTimeToReturnResult = time.time() - t0

            PerformanceTestReporter.recordTest(testName, totalTimeToReturnResult, None)
        finally:
            simulation.teardown()
コード例 #5
0
    def test_class_sorting_is_fast(self):
        sz = 10000000
        
        #burn the calculation in.
        self.classSortingTest(sz, useClass=True)
        self.classSortingTest(sz, useClass=False)
        
        t0 = time.time()

        with PerformanceTestReporter.RecordAsPerfTest("python.datatasks.sort_class_instances"):
            self.classSortingTest(sz, useClass=True)

        t1 = time.time()

        with PerformanceTestReporter.RecordAsPerfTest("python.datatasks.sort_integers"):
            self.classSortingTest(sz, useClass=False)

        t2 = time.time()

        intTime = t2 - t1
        classTime = t1 - t0

        self.assertTrue(classTime < intTime * 5, "Times (ints) %s and (classes) %s were not very close." % (intTime, classTime))

        print intTime, " to sort ints"
        print classTime, " to sort class instances"
コード例 #6
0
ファイル: testTextManipulation.py プロジェクト: vishnur/ufora
    def test_transposeToColumnMajor(self):
        s3 = InMemoryS3Interface.InMemoryS3InterfaceFactory()

        nRows = 100000
        nColumns = 50

        result, simulation = InMemoryCumulusSimulation.computeUsingSeveralWorkers(
            self.transposeSetupScript(nRows, nColumns),
            s3, 1, timeout = 300, memoryLimitMb = 45 * 1024, threadCount = 30,
            returnSimulation = True, useInMemoryCache = False)

        try:
            self.assertTrue(result.isResult())

            rowMajor = result.asResult.result

            t0 = time.time()
            result = simulation.compute(
                self.transposeRowMajorToColumnMajorScript(nRows, nColumns),
                timeout = 500,
                rowMajor = rowMajor
                )
            totalTimeToReturnResult = time.time() - t0

            self.assertTrue(result.isResult())

            PerformanceTestReporter.recordTest(
                "algorithms.text.transposeRowMajorToColumnMajor.%srows_%scolumns" % (nRows, nColumns),
                totalTimeToReturnResult, None)

        finally:
            simulation.teardown()
コード例 #7
0
    def stringCreationAndSumTest(self, totalStrings, workers, threadsPerWorker, testName):
        s3 = InMemoryS3Interface.InMemoryS3InterfaceFactory()

        #we wish we could actually test that we achieve saturation here but we can't yet.
        text = """Vector.range(%s, String).sum(size)""" % totalStrings

        t0 = time.time()

        _, simulation = \
            self.computeUsingSeveralWorkers(
                "1+1",
                s3,
                workers,
                timeout = 240,
                memoryLimitMb = 55 * 1024 / workers,
                threadCount = threadsPerWorker,
                returnSimulation = True,
                useInMemoryCache = False
                )

        try:
            t0 = time.time()
            result = simulation.compute(text, timeout=240)
            totalTimeToReturnResult = time.time() - t0

            PerformanceTestReporter.recordTest(testName, totalTimeToReturnResult, None)
        finally:
            simulation.teardown()
コード例 #8
0
ファイル: testBigboxPerf.py プロジェクト: bolshoibooze/ufora
    def test_byteToStringAndBackInDifferentPatterns(self):
        s3 = ActualS3Interface.ActualS3InterfaceFactory()

        setupText = (
            """
            let ds = Vector.range(3000000000, {UInt8(_%100)});

            let dat = Vector.range(100, fun(block) {
                Vector.range(1000000, fun(o) { let base = block * 10000000 + o * 10; (base, base + 10) })
                });

            (ds, dat, dat.sum())
            """
            )

        setupResults, simulation = self.computeUsingSeveralWorkers(
                setupText,
                s3,
                1,
                memoryLimitMb=45 * 1024,
                threadCount=30,
                returnSimulation=True,
                ioTaskThreadOverride=8,
                useInMemoryCache=False,
                timeout=30,
                objectStore=self.createObjectStore(s3)
                )

        try:
            ds, dat, datSum = setupResults.asResult.result

            t0 = time.time()
            result = simulation.compute(
                "size(datSum ~~ { ds[_[0],_[1]].dataAsString }) == size(datSum)",
                timeout=120,
                ds=ds,
                dat=dat,
                datSum=datSum
                )
            PerformanceTestReporter.recordTest(
                "python.BigBox.DataAsString.FlatVector",
                time.time() - t0,
                None
                )

            t0 = time.time()
            result = simulation.compute(
                "size(dat ~~ {_ ~~ { ds[_[0],_[1]].dataAsString } }) == size(dat)",
                timeout=120,
                ds=ds,
                dat=dat,
                datSum=datSum
                )
            PerformanceTestReporter.recordTest(
                "python.BigBox.DataAsString.NestedVector",
                time.time() - t0,
                None
                )
        finally:
            simulation.teardown()
コード例 #9
0
ファイル: testBigboxPerf.py プロジェクト: bolshoibooze/ufora
    def downloadTaxiData(self,
                         filecount,
                         parse=False,
                         workers=1,
                         threadsPerWorker=30,
                         downloaderThreads=8):
        s3 = ActualS3Interface.ActualS3InterfaceFactory()
        
        bucketName = self.getTestDataBucket()

        result, simulation = self.computeUsingSeveralWorkers(
            "1+1",
            s3,
            workers,
            memoryLimitMb=45 * 1024 / workers,
            threadCount=threadsPerWorker,
            returnSimulation=True,
            ioTaskThreadOverride=downloaderThreads,
            useInMemoryCache=False,
            objectStore=self.createObjectStore(s3)
            )

        try:
            dsText = (
                """let ds = """ + "+".join([
                    'datasets.s3("%s", "taxi_month_%s.csv")' % (bucketName, ix) for ix in range(1, filecount+1)
                    ]) + ";"
                )

            text = dsText + "(ds, ds.sum(), size(ds))"

            downloadTimeStart = time.time()
            result = simulation.compute(text, timeout=240)
            self.assertTrue(result.isResult())
            downloadTimeEnd = time.time()
            ds, dsSum, bytecount = result.asResult.result

            if parse:
                parseTimeStart = time.time()
                result = simulation.compute("size(parsing.csv(ds))", timeout=240, ds=ds)
                parseTimeEnd = time.time()

                self.assertTrue(result.isResult())

                PerformanceTestReporter.recordTest(
                    "python.BigBox.LargeS3.ParseTaxidata." + str(filecount),
                    parseTimeEnd - parseTimeStart,
                    None
                    )
            else:
                bytecount = bytecount.pyval
                PerformanceTestReporter.recordTest(
                    "python.BigBox.LargeS3.TaxiSecondsPerGB." + str(filecount),
                    (downloadTimeEnd - downloadTimeStart) / (bytecount / 1024 / 1024.0 / 1024.0),
                    None
                    )
        finally:
            simulation.teardown()
コード例 #10
0
    def stringToInt64ParsingTest(self, threads, testName):
        s3 = InMemoryS3Interface.InMemoryS3InterfaceFactory()

        #we wish we could actually test that we achieve saturation here but we can't yet.
        text = """
            let doALoop = fun(x) {
                //pass 's' through a vector so that the compiler can't tell what it is
                let s = ["2013"][0];

                let res = 0
                for ix in sequence(x) {
                    if (ix == 0)
                        s = s + String(ix)

                    res = res + Int64(s) + ix
                    }
                res
                };

            Vector.range(__thread_count__) ~~ {doALoop(20000000 + _)}
            """.replace("__thread_count__", str(threads))

        _, simulation = \
            self.computeUsingSeveralWorkers(
                "1+1",
                s3,
                1,
                timeout = 240,
                memoryLimitMb = 55 * 1024,
                threadCount = 30,
                returnSimulation = True,
                useInMemoryCache = False
                )

        t0 = time.time()

        _, simulation = \
            self.computeUsingSeveralWorkers(
                "1+1",
                s3,
                1,
                timeout = 240,
                memoryLimitMb = 55 * 1024,
                threadCount = 30,
                returnSimulation = True,
                useInMemoryCache = False
                )

        try:
            t0 = time.time()
            result = simulation.compute(text, timeout=240)
            totalTimeToReturnResult = time.time() - t0

            PerformanceTestReporter.recordTest(testName,
                                               totalTimeToReturnResult, None)
        finally:
            simulation.teardown()
コード例 #11
0
ファイル: testBigboxMemoryPerf.py プロジェクト: vishnur/ufora
 def testMemoryUpdate(self):
     measurementTime = self.measurementTime
     for cores in self.coreList:
         for allocSize in self.allocSizeList:
             PerformanceTestReporter.recordTest(
                 "python.BigBox.MemoryUpdate.SecondsPerGB.%sCore_%sMB" %
                 (cores, allocSize),
                 self.measureMemoryUpdatePerformance(
                     measurementTime, 1024 * 1024 * allocSize, cores), None)
コード例 #12
0
 def testMemoryUpdate(self):
     measurementTime = self.measurementTime
     for cores in self.coreList:
         for allocSize in self.allocSizeList:
             PerformanceTestReporter.recordTest(
                 "python.BigBox.MemoryUpdate.SecondsPerGB.%sCore_%sMB" % (cores, allocSize),
                 self.measureMemoryUpdatePerformance(measurementTime, 1024 * 1024 * allocSize, cores),
                 None
                 )
コード例 #13
0
 def testMmapAllocation(self):
     measurementTime = self.measurementTime
     for cores in self.coreList:
         for allocSize in self.allocSizeList:
             PerformanceTestReporter.recordTest(
                 "python.BigBox.MmapAlloc.SecondsPerGB.%sCore_%sMB" % (cores, allocSize),
                 self.measureMmapPerformance(measurementTime, 1024 * 1024 * allocSize, cores, False),
                 None
                 )
コード例 #14
0
ファイル: testGbmRegression.py プロジェクト: vishnur/ufora
    def gbmRegressionFittingTest(self, nRows, nColumns, depth, nThreads,
                                 maxBoosts):
        testName = self.getTestName(nRows, nColumns, depth, maxBoosts,
                                    nThreads)

        s3 = InMemoryS3Interface.InMemoryS3InterfaceFactory()

        result, simulation = InMemoryCumulusSimulation.computeUsingSeveralWorkers(
            self.dataGenerationScript(nRows, nColumns),
            s3,
            1,
            timeout=360,
            memoryLimitMb=30 * 1024,
            threadCount=nThreads,
            returnSimulation=True,
            useInMemoryCache=False)
        try:
            self.assertTrue(result.isResult())

            dfPredictors, dfResponse = result.asResult.result

            fitter = simulation.compute(
                self.regressionScript(depth, 1),
                timeout=360,
                dfResponse=dfResponse,
                dfPredictors=dfPredictors).asResult.result

            t0 = time.time()

            for nBoosts in range(1, maxBoosts):
                testName = self.getTestName(nRows, nColumns, depth, nBoosts,
                                            nThreads)

                predictions = simulation.compute(
                    "fitter.predictionsAndPseudoresiduals()",
                    timeout=360,
                    fitter=fitter).asResult.result
                totalTimeToReturnResult = time.time() - t0

                PerformanceTestReporter.recordTest(testName + "_predict",
                                                   totalTimeToReturnResult,
                                                   None)

                fitter = simulation.compute(
                    "fitter.nextGivenPredictions(predictions)",
                    timeout=360,
                    fitter=fitter,
                    predictions=predictions).asResult.result
                totalTimeToReturnResult = time.time() - t0

                PerformanceTestReporter.recordTest(testName,
                                                   totalTimeToReturnResult,
                                                   None)

        finally:
            simulation.teardown()
コード例 #15
0
    def stringToInt64ParsingTest(self, threads, testName):
        s3 = InMemoryS3Interface.InMemoryS3InterfaceFactory()

        #we wish we could actually test that we achieve saturation here but we can't yet.
        text = """
            let doALoop = fun(x) {
                //pass 's' through a vector so that the compiler can't tell what it is
                let s = ["2013"][0];

                let res = 0
                for ix in sequence(x) {
                    if (ix == 0)
                        s = s + String(ix)

                    res = res + Int64(s) + ix
                    }
                res
                };

            Vector.range(__thread_count__) ~~ {doALoop(20000000 + _)}
            """.replace("__thread_count__", str(threads))

        _, simulation = \
            self.computeUsingSeveralWorkers(
                "1+1",
                s3,
                1,
                timeout = 240,
                memoryLimitMb = 55 * 1024,
                threadCount = 30,
                returnSimulation = True,
                useInMemoryCache = False
                )

        t0 = time.time()

        _, simulation = \
            self.computeUsingSeveralWorkers(
                "1+1",
                s3,
                1,
                timeout = 240,
                memoryLimitMb = 55 * 1024,
                threadCount = 30,
                returnSimulation = True,
                useInMemoryCache = False
                )

        try:
            t0 = time.time()
            result = simulation.compute(text, timeout=240)
            totalTimeToReturnResult = time.time() - t0

            PerformanceTestReporter.recordTest(testName, totalTimeToReturnResult, None)
        finally:
            simulation.teardown()
コード例 #16
0
    def test_knows_is_reporting(self):
        with SetEnv(
                PerformanceTestReporter.TEST_DATA_LOCATION_ENVIRONMENT_VARIABLE, 
                None):
            self.assertFalse(PerformanceTestReporter.isCurrentlyTesting())

        with SetEnv(
                PerformanceTestReporter.TEST_DATA_LOCATION_ENVIRONMENT_VARIABLE, 
                "./testResults.json"):
            self.assertTrue(PerformanceTestReporter.isCurrentlyTesting())
コード例 #17
0
ファイル: testBigboxMemoryPerf.py プロジェクト: vishnur/ufora
 def testMmapAllocation(self):
     measurementTime = self.measurementTime
     for cores in self.coreList:
         for allocSize in self.allocSizeList:
             PerformanceTestReporter.recordTest(
                 "python.BigBox.MmapAlloc.SecondsPerGB.%sCore_%sMB" %
                 (cores, allocSize),
                 self.measureMmapPerformance(measurementTime,
                                             1024 * 1024 * allocSize, cores,
                                             False), None)
コード例 #18
0
    def test_cant_report_nonsensical_timing(self):
        tempDir = tempfile.mkdtemp()
        tempFile = os.path.join(tempDir, "data.json")

        with SetEnv(
                PerformanceTestReporter.TEST_DATA_LOCATION_ENVIRONMENT_VARIABLE, 
                tempFile
                ):
            with self.assertRaises(Exception):
                PerformanceTestReporter.recordTest("test1","not a float",None)
コード例 #19
0
    def test_effectiveParallelism(self):
        s3 = InMemoryS3Interface.InMemoryS3InterfaceFactory()

        #do a burn-in run
        self.computeUsingSeveralWorkers("""
                let v = Vector.range(5000000, { (1,_) } );

                let f = fun(ix) {
                    let res = 0
                    for x in sequence( (ix - 2000) >>> 0, ix )
                        res = res + size(v[x])
                    res
                    }

                Vector.range(size(v),  f).sum()

                """,
                                        s3,
                                        2,
                                        wantsStats=True,
                                        timeout=240,
                                        memoryLimitMb=500)[1]

        t0 = time.time()

        stats = self.computeUsingSeveralWorkers("""
                let v = Vector.range(5000000, { (1,_) } );

                let f = fun(ix) {
                    let res = 0
                    for x in sequence( (ix - 2000) >>> 0, ix )
                        res = res + size(v[x])
                    res
                    }

                Vector.range(size(v),  f).sum()

                """,
                                                s3,
                                                2,
                                                wantsStats=True,
                                                timeout=240,
                                                memoryLimitMb=500)[1]

        timeElapsed = time.time() - t0
        totalTime = stats.timeSpentInInterpreter + stats.timeSpentInCompiler
        effParallelism = totalTime / timeElapsed

        PerformanceTestReporter.recordTest(
            "python.cumulus.EffectiveParallelism.elapsed", timeElapsed, None)

        PerformanceTestReporter.recordTest(
            "python.cumulus.EffectiveParallelism.effectiveCores",
            effParallelism, {},
            units='count')
コード例 #20
0
ファイル: testGbmRegression.py プロジェクト: Sandy4321/ufora
    def gbmRegressionFittingTest(self, nRows, nColumns, depth, nThreads, maxBoosts):
        testName = self.getTestName(nRows, nColumns, depth, maxBoosts, nThreads)

        s3 = InMemoryS3Interface.InMemoryS3InterfaceFactory()

        result, simulation = InMemoryCumulusSimulation.computeUsingSeveralWorkers(
                        self.dataGenerationScript(nRows, nColumns),
                        s3,
                        1,
                        timeout = 360,
                        memoryLimitMb = 30 * 1024,
                        threadCount = nThreads,
                        returnSimulation = True,
                        useInMemoryCache = False
                        )
        try:
            self.assertTrue(result.isResult())

            dfPredictors, dfResponse = result.asResult.result

            fitter = simulation.compute(
                self.regressionScript(depth, 1),
                timeout = 360,
                dfResponse = dfResponse,
                dfPredictors = dfPredictors
                ).asResult.result

            t0 = time.time()

            for nBoosts in range(1, maxBoosts):
                testName = self.getTestName(nRows, nColumns, depth, nBoosts, nThreads)

                predictions = simulation.compute(
                    "fitter.predictionsAndPseudoresiduals()",
                    timeout = 360,
                    fitter = fitter
                    ).asResult.result
                totalTimeToReturnResult = time.time() - t0

                PerformanceTestReporter.recordTest(
                    testName + "_predict", totalTimeToReturnResult, None)

                fitter = simulation.compute(
                    "fitter.nextGivenPredictions(predictions)",
                    timeout = 360,
                    fitter = fitter,
                    predictions = predictions
                    ).asResult.result
                totalTimeToReturnResult = time.time() - t0

                PerformanceTestReporter.recordTest(
                    testName, totalTimeToReturnResult, None)

        finally:
            simulation.teardown()
    def test_CalculationRicochet(self):
        s3 = InMemoryS3Interface.InMemoryS3InterfaceFactory()

        text = """
            let f = fun(ct, seed = 1) {
                let x = 0

                let res = []

                let it = iterator(math.random.UniformReal(0, size(v), seed))

                for ix in sequence(ct) {
                    let x = Int64(pull it)
                    res = res :: (x / Float64(size(v)), v[x])
                    }

                return res
                }

            v[2]
            f(__count__,__seed__)
            """

        vResult, sim = InMemoryCumulusSimulation.computeUsingSeveralWorkers(
            "Vector.range(125000000, math.log)",
            s3,
            4,
            timeout=120,
            memoryLimitMb=400,
            threadCount=1,
            useInMemoryCache=True,
            returnSimulation=True)

        try:
            v = vResult.asResult.result

            t0 = time.time()
            sim.compute(text.replace("__seed__",
                                     "1").replace("__count__", "1000"),
                        timeout=120,
                        v=v)
            PerformanceTestReporter.recordTest(
                "python.InMemoryCumulus.Ricochet1000.Pass1",
                time.time() - t0, None)

            t0 = time.time()
            sim.compute(text.replace("__seed__",
                                     "2").replace("__count__", "1000"),
                        timeout=120,
                        v=v)
            PerformanceTestReporter.recordTest(
                "python.InMemoryCumulus.Ricochet1000.Pass2",
                time.time() - t0, None)
        finally:
            sim.teardown()
コード例 #22
0
    def largeDatasetJoinTest(self, mbOfData, columns, threads, machineCount, ratio = .5):
        s3 = InMemoryS3Interface.InMemoryS3InterfaceFactory()

        t0 = time.time()

        result, simulation = InMemoryCumulusSimulation.computeUsingSeveralWorkers(
                        self.dataGenerationScript(mbOfData, columns),
                        s3,
                        machineCount,
                        timeout = 360,
                        memoryLimitMb = mbOfData / ratio / machineCount,
                        #channelThroughputMBPerSecond = 100.0,
                        threadCount = threads,
                        returnSimulation = True,
                        useInMemoryCache = False,
                        disableEventHandler = True
                        )

        try:
            self.assertTrue(result.isResult())

            data = result.asResult.result

            joinScript = """
                    let leftDF = dataframe.DataFrame(data[,size(data)/2])
                    let rightDF = dataframe.DataFrame(data[size(data)/2,])

                    size(leftDF.join(rightDF, on: "C0", how: `outer, chunkSize: 1000000, areSorted:true))
                    """

            t0 = time.time()
            result = simulation.compute(
                joinScript,
                timeout=1080,
                data=data
                )
            totalTimeToReturnResult = time.time() - t0

            logging.info("Total time to join: %s", totalTimeToReturnResult)

            self.assertTrue(result.isResult(), result)

            PerformanceTestReporter.recordTest(
                "algorithms.Join.inMemory_%sMB_%scols_%sthreads_%smachines" %
                    (mbOfData, columns,threads,machineCount),
                totalTimeToReturnResult,
                None
                )
        finally:
            dfResponse = None
            dfPredictors = None
            result = None
            simulation.teardown()
コード例 #23
0
ファイル: testBigboxDiskPerf.py プロジェクト: Sandy4321/ufora
    def test_disk_scans(self):
        s3 = ActualS3Interface.ActualS3InterfaceFactory()
        objectStore = S3ObjectStore.S3ObjectStore(
            s3,
            Setup.config().userDataS3Bucket,
            prefix="test_object_cache/"
            )

        _, simulation = InMemoryCumulusSimulation.computeUsingSeveralWorkers(
            "1+1",
            s3,
            1,
            memoryLimitMb=1 * 1024,
            threadCount=30,
            returnSimulation=True,
            ioTaskThreadOverride=8,
            objectStore=objectStore,
            useInMemoryCache=False  #use an actual disk cache for this
            )

        try:
            gigabytes = 8

            t0 = time.time()

            resultVectors = []
            for ix in range(gigabytes):
                result = simulation.compute("Vector.range(125000000 + %s)" % ix, timeout=120)
                resultVectors.append(result.asResult.result)

            t1 = time.time()

            intResults = []
            for vec in resultVectors:
                result = simulation.compute("v.sum()", timeout = 120, v=vec)
                intResults.append(result.asResult.result.pyval)


            self.assertTrue(len(intResults) == gigabytes)

            PerformanceTestReporter.recordTest(
                "python.BigBox.Disk.Write.10GB",
                t1 - t0,
                None
                )

            PerformanceTestReporter.recordTest(
                "python.BigBox.Disk.WriteAndScan.%sGB" % gigabytes,
                time.time() - t0,
                None
                )
        finally:
            simulation.teardown()
コード例 #24
0
    def gbmRegressionFittingTest(self,
                                 nRows,
                                 nColumns,
                                 depth,
                                 nThreads,
                                 nBoosts,
                                 copies,
                                 report=True):

        s3 = InMemoryS3Interface.InMemoryS3InterfaceFactory()

        result, simulation = InMemoryCumulusSimulation.computeUsingSeveralWorkers(
            self.dataGenerationScript(nRows, nColumns),
            s3,
            1,
            timeout=360,
            memoryLimitMb=30 * 1024,
            threadCount=nThreads,
            returnSimulation=True,
            useInMemoryCache=False)
        try:
            self.assertTrue(result.isResult())

            dfPredictors, dfResponse = result.asResult.result

            builder = simulation.compute(
                self.regressionScript(depth, nBoosts),
                timeout=360,
                dfResponse=dfResponse,
                dfPredictors=dfPredictors).asResult.result

            t0 = time.time()

            testName = self.getTestName(nRows, nColumns, depth, nBoosts,
                                        nThreads, copies)

            result = simulation.compute(
                "Vector.range(%s).apply(fun(x) { builder.fit(dfPredictors[,-x-1], dfResponse[,-x-1]) })"
                % copies,
                timeout=360,
                builder=builder,
                dfPredictors=dfPredictors,
                dfResponse=dfResponse,
            ).asResult.result
            totalTimeToReturnResult = time.time() - t0

            if report:
                PerformanceTestReporter.recordTest(testName,
                                                   totalTimeToReturnResult,
                                                   None)

        finally:
            simulation.teardown()
コード例 #25
0
    def test_effectiveParallelism(self):
        s3 = InMemoryS3Interface.InMemoryS3InterfaceFactory()

        #do a burn-in run
        self.computeUsingSeveralWorkers("""
                let v = Vector.range(5000000, { (1,_) } );

                let f = fun(ix) {
                    let res = 0
                    for x in sequence( (ix - 2000) >>> 0, ix )
                        res = res + size(v[x])
                    res
                    }

                Vector.range(size(v),  f).sum()

                """, s3, 2, wantsStats = True, timeout=240, memoryLimitMb=500
                )[1]

        t0 = time.time()

        stats = self.computeUsingSeveralWorkers("""
                let v = Vector.range(5000000, { (1,_) } );

                let f = fun(ix) {
                    let res = 0
                    for x in sequence( (ix - 2000) >>> 0, ix )
                        res = res + size(v[x])
                    res
                    }

                Vector.range(size(v),  f).sum()

                """, s3, 2, wantsStats = True, timeout=240, memoryLimitMb=500
                )[1]

        timeElapsed = time.time() - t0
        totalTime = stats.timeSpentInInterpreter + stats.timeSpentInCompiler
        effParallelism = totalTime / timeElapsed

        PerformanceTestReporter.recordTest(
            "python.cumulus.EffectiveParallelism.elapsed",
            timeElapsed,
            None
            )

        PerformanceTestReporter.recordTest(
            "python.cumulus.EffectiveParallelism.effectiveCores",
            effParallelism,
            {},
            units='count'
            )
コード例 #26
0
ファイル: testExampleThoughput.py プロジェクト: vishnur/ufora
    def test_some_throughput(self):
        def toTest(n):
            FORA.eval("""let v = [0, 0.0]; let res = 0;
                         for ix in sequence(%s * 100000000) { 
                             res = res + v[0] + v[1];
                             }
                         res""" % n)

        PerformanceTestReporter.testThroughput(
            "fora_lang.LangTestPerf.vector.heterogeneousVectorAccessThroughput_100mm",
            toTest,
            maxNToSearch=10,
            timeoutInSec=5.0)
コード例 #27
0
    def largeDatasetJoinTest(self,
                             mbOfData,
                             columns,
                             threads,
                             machineCount,
                             ratio=.5):
        s3 = InMemoryS3Interface.InMemoryS3InterfaceFactory()

        t0 = time.time()

        result, simulation = InMemoryCumulusSimulation.computeUsingSeveralWorkers(
            self.dataGenerationScript(mbOfData, columns),
            s3,
            machineCount,
            timeout=360,
            memoryLimitMb=mbOfData / ratio / machineCount,
            #channelThroughputMBPerSecond = 100.0,
            threadCount=threads,
            returnSimulation=True,
            useInMemoryCache=False,
            disableEventHandler=True)

        try:
            self.assertTrue(result.isResult())

            data = result.asResult.result

            joinScript = """
                    let leftDF = dataframe.DataFrame(data[,size(data)/2])
                    let rightDF = dataframe.DataFrame(data[size(data)/2,])

                    size(leftDF.join(rightDF, on: "C0", how: `outer, chunkSize: 1000000, areSorted:true))
                    """

            t0 = time.time()
            result = simulation.compute(joinScript, timeout=1080, data=data)
            totalTimeToReturnResult = time.time() - t0

            logging.info("Total time to join: %s", totalTimeToReturnResult)

            self.assertTrue(result.isResult(), result)

            PerformanceTestReporter.recordTest(
                "algorithms.Join.inMemory_%sMB_%scols_%sthreads_%smachines" %
                (mbOfData, columns, threads, machineCount),
                totalTimeToReturnResult, None)
        finally:
            dfResponse = None
            dfPredictors = None
            result = None
            simulation.teardown()
コード例 #28
0
    def test_some_throughput(self):
        def toTest(n):
            FORA.eval("""let v = [0, 0.0]; let res = 0;
                         for ix in sequence(%s * 100000000) { 
                             res = res + v[0] + v[1];
                             }
                         res""" % n)

        PerformanceTestReporter.testThroughput(
            "fora_lang.LangTestPerf.vector.heterogeneousVectorAccessThroughput_100mm", 
            toTest, 
            maxNToSearch=10,
            timeoutInSec=5.0
            )
コード例 #29
0
    def test_throughputThrowsIfNonePassed(self):
        tempDir = tempfile.mkdtemp()
        tempFile = os.path.join(tempDir, "data.json")

        with SetEnv(
                PerformanceTestReporter.TEST_DATA_LOCATION_ENVIRONMENT_VARIABLE, 
                tempFile
                ):
            def testFunOfN(n):
                raise PerformanceTestReporter.TimedOutException("timed out!!")

            with self.assertRaises(AssertionError):
                PerformanceTestReporter.testThroughput(
                    "test1", testFunOfN = testFunOfN)
コード例 #30
0
ファイル: numpyThroughputTest.py プロジェクト: ufora/ufora
    def vector_dot_product(self, dimension):
        with self.ufora.remotely:
            a = np.arange(dimension)
            b = np.arange(dimension)

        def f(n):
            with self.ufora.remotely:
                for _ in xrange(n):
                    np.dot(a, b)

        PerformanceTestReporter.testThroughput(
            "pyfora.numpy.vector_dot_product_%d" % dimension,
            f,
            maxNToSearch=20,
            timeoutInSec=20.0)
コード例 #31
0
ファイル: numpyThroughputTest.py プロジェクト: ufora/ufora
    def array_binary_operation(self, dimension, op, test_name):
        with self.ufora.remotely:
            a = np.arange(dimension)
            b = np.arange(dimension)

        def f(n):
            with self.ufora.remotely:
                for _ in xrange(n):
                    op(a, b)

        PerformanceTestReporter.testThroughput("pyfora.numpy.%s_%d" %
                                               (test_name, dimension),
                                               f,
                                               maxNToSearch=20,
                                               timeoutInSec=20.0)
コード例 #32
0
ファイル: bigLM_test.py プロジェクト: vishnur/ufora
    def largeDatasetBigLMTest(self, mbOfData, columns, threads, testName):
        s3 = InMemoryS3Interface.InMemoryS3InterfaceFactory()

        t0 = time.time()

        result, simulation = InMemoryCumulusSimulation.computeUsingSeveralWorkers(
            self.dataGenerationScript(mbOfData, columns),
            s3,
            1,
            timeout=360,
            memoryLimitMb=50 * 1024,
            threadCount=threads,
            returnSimulation=True,
            useInMemoryCache=False)

        if testName is not None:
            PerformanceTestReporter.recordTest(testName + "_create",
                                               time.time() - t0, None)

        try:
            self.assertTrue(result.isResult())

            dfResponse, dfPredictors = result.asResult.result

            regressionScript = """
                let model = math.regression.LinearRegression(dfPredictors, dfResponse, fitIntercept: false);
                let coefficients = model.coefficients();
                coefficients[0]
                """

            t0 = time.time()
            result = simulation.compute(regressionScript,
                                        timeout=1080,
                                        dfResponse=dfResponse,
                                        dfPredictors=dfPredictors)
            totalTimeToReturnResult = time.time() - t0

            self.assertTrue(result.isResult())

            if testName is not None:
                PerformanceTestReporter.recordTest(testName,
                                                   totalTimeToReturnResult,
                                                   None)
        finally:
            dfResponse = None
            dfPredictors = None
            result = None
            simulation.teardown()
コード例 #33
0
ファイル: bigLM_test.py プロジェクト: vishnur/ufora
    def largeDatasetBigLMTest(self, mbOfData, columns, threads, testName):
        s3 = InMemoryS3Interface.InMemoryS3InterfaceFactory()

        t0 = time.time()

        result, simulation = InMemoryCumulusSimulation.computeUsingSeveralWorkers(
                        self.dataGenerationScript(mbOfData, columns),
                        s3,
                        1,
                        timeout = 360,
                        memoryLimitMb = 50 * 1024,
                        threadCount = threads,
                        returnSimulation = True,
                        useInMemoryCache = False
                        )

        if testName is not None:
            PerformanceTestReporter.recordTest(testName + "_create", time.time() - t0, None)

        try:
            self.assertTrue(result.isResult())

            dfResponse, dfPredictors = result.asResult.result

            regressionScript = """
                let model = math.regression.LinearRegression(dfPredictors, dfResponse, fitIntercept: false);
                let coefficients = model.coefficients();
                coefficients[0]
                """

            t0 = time.time()
            result = simulation.compute(
                regressionScript,
                timeout=1080,
                dfResponse=dfResponse,
                dfPredictors=dfPredictors
                )
            totalTimeToReturnResult = time.time() - t0

            self.assertTrue(result.isResult())

            if testName is not None:
                PerformanceTestReporter.recordTest(testName, totalTimeToReturnResult, None)
        finally:
            dfResponse = None
            dfPredictors = None
            result = None
            simulation.teardown()
コード例 #34
0
    def array_binary_operation(self, dimension, op, test_name):
        with self.ufora.remotely:
            a = np.arange(dimension)
            b = np.arange(dimension)

        def f(n):
            with self.ufora.remotely:
                for _ in xrange(n):
                    op(a, b)

        PerformanceTestReporter.testThroughput(
            "pyfora.numpy.%s_%d" % (test_name, dimension),
            f,
            maxNToSearch=20,
            timeoutInSec=20.0
            )
コード例 #35
0
    def vector_dot_product(self, dimension):
        with self.ufora.remotely:
            a = np.arange(dimension)
            b = np.arange(dimension)

        def f(n):
            with self.ufora.remotely:
                for _ in xrange(n):
                    np.dot(a, b)

        PerformanceTestReporter.testThroughput(
            "pyfora.numpy.vector_dot_product_%d" % dimension,
            f,
            maxNToSearch=20,
            timeoutInSec=20.0
            )
コード例 #36
0
    def roundtripConvert(self, pyObject, testName):
        try:
            _, timings = self._roundtripConvert(pyObject)

            for k in sorted(timings):
                print k, timings[k]
                PerformanceTestReporter.recordTest(
                    testName=testName+"."+k,
                    elapsedTime=timings[k],
                    metadata=None
                    )
                
        except:
            import traceback
            traceback.print_exc()
            self.assertTrue(False)
コード例 #37
0
ファイル: testBigboxDiskPerf.py プロジェクト: Sandy4321/ufora
    def diskThroughputTest(self, gb):
        if os.getenv("CUMULUS_DATA_DIR") is None:
            dataDir = tempfile.mkdtemp()
        else:
            dataDir = os.getenv("CUMULUS_DATA_DIR")
        dataDir = os.path.join(dataDir, str(uuid.uuid4()))

        diskCache = CumulusNative.DiskOfflineCache(
            callbackScheduler,
            dataDir,
            100 * 1024 * 1024 * 1024,
            100000
            )

        fiftyMegabytes = ForaNative.encodeStringInSerializedObject(" " * 1024 * 1024 * 50)

        logging.info("Writing to %s", dataDir)

        try:
            t0 = time.time()
            for ix in range(gb * 20):
                diskCache.store(
                    ForaNative.PageId(HashNative.Hash.sha1(str(ix)), 50 * 1024 * 1024, 50 * 1024 * 1024),
                    fiftyMegabytes
                    )

            PerformanceTestReporter.recordTest(
                "python.BigBox.Disk.Write%sGB" % gb,
                time.time() - t0,
                None
                )

            t0 = time.time()
            for ix in range(gb * 20):
                diskCache.loadIfExists(
                    ForaNative.PageId(HashNative.Hash.sha1(str(ix)), 50 * 1024 * 1024, 50 * 1024 * 1024)
                    )


            PerformanceTestReporter.recordTest(
                "python.BigBox.Disk.Read%sGB" % gb,
                time.time() - t0,
                None
                )

        finally:
            shutil.rmtree(dataDir)
コード例 #38
0
    def regressionTreePredictionTest(self,
                                     mbOfData,
                                     columns,
                                     testName,
                                     treeDepth,
                                     threads,
                                     minSamplesSplit=50):
        s3 = InMemoryS3Interface.InMemoryS3InterfaceFactory()

        result, simulation = InMemoryCumulusSimulation.computeUsingSeveralWorkers(
            self.dataGenerationScript(mbOfData, columns),
            s3,
            1,
            timeout=360,
            memoryLimitMb=45 * 1024,
            threadCount=threads,
            returnSimulation=True,
            useInMemoryCache=False)
        try:
            self.assertTrue(result.isResult())

            dfResponse, dfPredictors = result.asResult.result

            fitTree = simulation.compute(self.regressionScript(
                treeDepth, minSamplesSplit - 1),
                                         timeout=120,
                                         dfResponse=dfResponse,
                                         dfPredictors=dfPredictors)

            def predictionScript(dirtyFlag=1):
                return ";(%s; fitRegressionTree.predict(dfPredictors));" % dirtyFlag

            t0 = time.time()
            result = simulation.compute(
                predictionScript(),
                timeout=120,
                dfPredictors=dfPredictors,
                fitRegressionTree=fitTree.asResult.result)
            totalTimeToReturnResult = time.time() - t0

            self.assertTrue(result.isResult())

            PerformanceTestReporter.recordTest(testName,
                                               totalTimeToReturnResult, None)

        finally:
            simulation.teardown()
コード例 #39
0
    def test_disk_scans(self):
        s3 = ActualS3Interface.ActualS3InterfaceFactory()
        objectStore = S3ObjectStore.S3ObjectStore(
            s3, Setup.config().userDataS3Bucket, prefix="test_object_cache/")

        _, simulation = InMemoryCumulusSimulation.computeUsingSeveralWorkers(
            "1+1",
            s3,
            1,
            memoryLimitMb=1 * 1024,
            threadCount=30,
            returnSimulation=True,
            ioTaskThreadOverride=8,
            objectStore=objectStore,
            useInMemoryCache=False  #use an actual disk cache for this
        )

        try:
            gigabytes = 8

            t0 = time.time()

            resultVectors = []
            for ix in range(gigabytes):
                result = simulation.compute("Vector.range(125000000 + %s)" %
                                            ix,
                                            timeout=120)
                resultVectors.append(result.asResult.result)

            t1 = time.time()

            intResults = []
            for vec in resultVectors:
                result = simulation.compute("v.sum()", timeout=120, v=vec)
                intResults.append(result.asResult.result.pyval)

            self.assertTrue(len(intResults) == gigabytes)

            PerformanceTestReporter.recordTest("python.BigBox.Disk.Write.10GB",
                                               t1 - t0, None)

            PerformanceTestReporter.recordTest(
                "python.BigBox.Disk.WriteAndScan.%sGB" % gigabytes,
                time.time() - t0, None)
        finally:
            simulation.teardown()
コード例 #40
0
    def test_reporting_to_file(self):
        tempDir = tempfile.mkdtemp()
        tempFile = os.path.join(tempDir, "data.json")

        with SetEnv(
                PerformanceTestReporter.TEST_DATA_LOCATION_ENVIRONMENT_VARIABLE, 
                tempFile
                ):
            PerformanceTestReporter.recordTest("test1.result", 10.0, {"some":"metadata"})
            PerformanceTestReporter.recordTest("test1.result", None, {"some":"metadata"})

        testData = PerformanceTestReporter.loadTestsFromFile(tempFile)

        self.assertEqual(testData,
            [{"name":"test1.result", "time":10.0, "metadata": {"some":"metadata"}},
             {"name":"test1.result", "time":None, "metadata": {"some":"metadata"}}
             ])
    def test_CalculationRicochet(self):
        s3 = InMemoryS3Interface.InMemoryS3InterfaceFactory()

        text = """
            let f = fun(ct, seed = 1) {
                let x = 0

                let res = []

                let it = iterator(math.random.UniformReal(0, size(v), seed))

                for ix in sequence(ct) {
                    let x = Int64(pull it)
                    res = res :: (x / Float64(size(v)), v[x])
                    }

                return res
                }

            v[2]
            f(__count__,__seed__)
            """

        vResult, sim = InMemoryCumulusSimulation.computeUsingSeveralWorkers(
            "Vector.range(125000000, math.log)",
            s3,
            4,
            timeout = 120,
            memoryLimitMb=400,
            threadCount = 1,
            useInMemoryCache = True,
            returnSimulation = True
            )

        try:
            v = vResult.asResult.result

            t0 = time.time()
            sim.compute(text.replace("__seed__", "1").replace("__count__", "1000"), timeout = 120, v = v)
            PerformanceTestReporter.recordTest("python.InMemoryCumulus.Ricochet1000.Pass1", time.time() - t0,None)

            t0 = time.time()
            sim.compute(text.replace("__seed__", "2").replace("__count__", "1000"), timeout = 120, v = v)
            PerformanceTestReporter.recordTest("python.InMemoryCumulus.Ricochet1000.Pass2", time.time() - t0,None)
        finally:
            sim.teardown()
コード例 #42
0
    def gbmRegressionFittingTest(self, nRows, nColumns, depth, nThreads, nBoosts, copies, report=True):

        s3 = InMemoryS3Interface.InMemoryS3InterfaceFactory()

        result, simulation = InMemoryCumulusSimulation.computeUsingSeveralWorkers(
                        self.dataGenerationScript(nRows, nColumns),
                        s3,
                        1,
                        timeout = 360,
                        memoryLimitMb = 30 * 1024,
                        threadCount = nThreads,
                        returnSimulation = True,
                        useInMemoryCache = False
                        )
        try:
            self.assertTrue(result.isResult())

            dfPredictors, dfResponse = result.asResult.result

            builder = simulation.compute(
                self.regressionScript(depth, nBoosts),
                timeout = 360,
                dfResponse = dfResponse,
                dfPredictors = dfPredictors
                ).asResult.result


            t0 = time.time()

            testName = self.getTestName(nRows, nColumns, depth, nBoosts, nThreads, copies)

            result = simulation.compute(
                "Vector.range(%s).apply(fun(x) { builder.fit(dfPredictors[,-x-1], dfResponse[,-x-1]) })"
                    % copies,
                timeout = 360,
                builder=builder,
                dfPredictors=dfPredictors,
                dfResponse=dfResponse,
                ).asResult.result
            totalTimeToReturnResult = time.time() - t0

            if report:
                PerformanceTestReporter.recordTest(testName, totalTimeToReturnResult, None)

        finally:
            simulation.teardown()
コード例 #43
0
ファイル: numpyThroughputTest.py プロジェクト: ufora/ufora
    def matrix_dot_product(self, dimension):
        with self.ufora.remotely:
            a = np.arange(dimension**2).reshape(
                (int(dimension), int(dimension)))
            b = np.arange(dimension**2).reshape(
                (int(dimension), int(dimension)))

        def f(n):
            with self.ufora.remotely:
                for _ in xrange(n):
                    np.dot(a, b)

        PerformanceTestReporter.testThroughput(
            "pyfora.numpy.matrix_dot_product_%dx%d" % (dimension, dimension),
            f,
            maxNToSearch=20,
            timeoutInSec=20.0)
コード例 #44
0
    def matrix_dot_product(self, dimension):
        with self.ufora.remotely:
            a = np.arange(dimension**2).reshape(
                (int(dimension), int(dimension)))
            b = np.arange(dimension**2).reshape(
                (int(dimension), int(dimension)))

        def f(n):
            with self.ufora.remotely:
                for _ in xrange(n):
                    np.dot(a, b)

        PerformanceTestReporter.testThroughput(
            "pyfora.numpy.matrix_dot_product_%dx%d" % (dimension, dimension),
            f,
            maxNToSearch=20,
            timeoutInSec=20.0
            )
コード例 #45
0
    def regressionTreePredictionTest(self, mbOfData, columns, testName,
                                     treeDepth, threads, minSamplesSplit=50):
        s3 = InMemoryS3Interface.InMemoryS3InterfaceFactory()

        result, simulation = InMemoryCumulusSimulation.computeUsingSeveralWorkers(
                        self.dataGenerationScript(mbOfData, columns),
                        s3,
                        1,
                        timeout = 360,
                        memoryLimitMb = 45 * 1024,
                        threadCount = threads,
                        returnSimulation = True,
                        useInMemoryCache = False
                        )
        try:
            self.assertTrue(result.isResult())

            dfResponse, dfPredictors = result.asResult.result

            fitTree = simulation.compute(
                self.regressionScript(treeDepth, minSamplesSplit - 1),
                timeout=120,
                dfResponse=dfResponse,
                dfPredictors=dfPredictors
                )

            def predictionScript(dirtyFlag=1):
                return ";(%s; fitRegressionTree.predict(dfPredictors));" % dirtyFlag

            t0 = time.time()
            result = simulation.compute(
                predictionScript(),
                timeout=120,
                dfPredictors=dfPredictors,
                fitRegressionTree=fitTree.asResult.result
                )
            totalTimeToReturnResult = time.time() - t0

            self.assertTrue(result.isResult())

            PerformanceTestReporter.recordTest(testName, totalTimeToReturnResult, None)

        finally:
            simulation.teardown()
コード例 #46
0
    def test_throughputDoesNotFailOnTimeoutIfSomePassed(self):
        tempDir = tempfile.mkdtemp()
        tempFile = os.path.join(tempDir, "data.json")

        with SetEnv(
                PerformanceTestReporter.TEST_DATA_LOCATION_ENVIRONMENT_VARIABLE, 
                tempFile
                ):
            def testFunOfN(n):
                if n < 10:
                    pass
                else:
                    raise PerformanceTestReporter.TimedOutException("timed out!!")
            PerformanceTestReporter.testThroughput(
                "test1", testFunOfN = testFunOfN)
            
        testData = PerformanceTestReporter.loadTestsFromFile(tempFile)

        self.assertEqual(len(testData), 1)
コード例 #47
0
ファイル: PerfTestBase.py プロジェクト: ufora/ufora
    def validatePerfForExpression(self, testName, foraExpr, callResult=False):
        subprocessArgs = \
            [sys.executable, ValidatePerf.__file__, foraExpr]

        if callResult:
            subprocessArgs.append("callResult")

        self.validateTimingsForSubprocessCall(
            testName, subprocessArgs,
            PerformanceTestReporter.getCurrentStackframeFileAndLine(
                framesAbove=2))
コード例 #48
0
    def stringToDatetimeParsingTest(self, threads, testName):
        s3 = InMemoryS3Interface.InMemoryS3InterfaceFactory()

        #we wish we could actually test that we achieve saturation here but we can't yet.
        text = """
            let s = ["2013-01-01 15:18:10"][0];

            let doALoop = fun(x) {
                let res = 0
                for ix in sequence(x) {
                    res = res + DateTime(s).year
                    }
                res
                };

            Vector.range(__thread_count__) ~~ {doALoop(1000000 + _)}
            """.replace("__thread_count__", str(threads))

        t0 = time.time()

        _, simulation = \
            self.computeUsingSeveralWorkers(
                "1+1",
                s3,
                1,
                timeout = 240,
                memoryLimitMb = 55 * 1024 / workers,
                threadCount = 30,
                returnSimulation = True,
                useInMemoryCache = False
                )

        try:
            t0 = time.time()
            result = simulation.compute(text, timeout=240)
            totalTimeToReturnResult = time.time() - t0

            PerformanceTestReporter.recordTest(testName,
                                               totalTimeToReturnResult, None)
        finally:
            simulation.teardown()
コード例 #49
0
ファイル: PerfTestBase.py プロジェクト: Sandy4321/ufora
    def validatePerfForExpression(self, testName, foraExpr, callResult = False):
        subprocessArgs = \
            [sys.executable, ValidatePerf.__file__, foraExpr];

        if callResult:
            subprocessArgs.append("callResult")

        self.validateTimingsForSubprocessCall(
            testName,
            subprocessArgs,
            PerformanceTestReporter.getCurrentStackframeFileAndLine(framesAbove=2)
            )
コード例 #50
0
    def stringToDatetimeParsingTest(self, threads, testName):
        s3 = InMemoryS3Interface.InMemoryS3InterfaceFactory()

        #we wish we could actually test that we achieve saturation here but we can't yet.
        text = """
            let s = ["2013-01-01 15:18:10"][0];

            let doALoop = fun(x) {
                let res = 0
                for ix in sequence(x) {
                    res = res + DateTime(s).year
                    }
                res
                };

            Vector.range(__thread_count__) ~~ {doALoop(1000000 + _)}
            """.replace("__thread_count__", str(threads))

        t0 = time.time()

        _, simulation = \
            self.computeUsingSeveralWorkers(
                "1+1",
                s3,
                1,
                timeout = 240,
                memoryLimitMb = 55 * 1024 / workers,
                threadCount = 30,
                returnSimulation = True,
                useInMemoryCache = False
                )

        try:
            t0 = time.time()
            result = simulation.compute(text, timeout=240)
            totalTimeToReturnResult = time.time() - t0

            PerformanceTestReporter.recordTest(testName, totalTimeToReturnResult, None)
        finally:
            simulation.teardown()
コード例 #51
0
ファイル: PerfTestBase.py プロジェクト: Sandy4321/ufora
    def validateTimingsForSubprocessCall(
                self,
                testName,
                subprocessArgs,
                meta,
                timeout = 600.0
                ):
        resultCode, out, err = SubprocessRunner.callAndReturnResultAndOutput(
            subprocessArgs,
            timeout = timeout
            )


        if resultCode != 0:
            meta.update({"failure": "subprocess call returned error"})

            if PerformanceTestReporter.isCurrentlyTesting():
                PerformanceTestReporter.recordTest(
                    testName,
                    None,
                    meta
                    )

        assert resultCode == 0, err

        logging.info("Actual time was %s for %s", out[0], subprocessArgs)

        measuredTiming = float(out[0]) / self.baseTiming

        if PerformanceTestReporter.isCurrentlyTesting():
            PerformanceTestReporter.recordTest(
                "fora_lang." + testName,
                float(out[0]),
                meta
                )
コード例 #52
0
    def diskThroughputTest(self, gb):
        if os.getenv("CUMULUS_DATA_DIR") is None:
            dataDir = tempfile.mkdtemp()
        else:
            dataDir = os.getenv("CUMULUS_DATA_DIR")
        dataDir = os.path.join(dataDir, str(uuid.uuid4()))

        diskCache = CumulusNative.DiskOfflineCache(callbackScheduler, dataDir,
                                                   100 * 1024 * 1024 * 1024,
                                                   100000)

        fiftyMegabytes = ForaNative.encodeStringInSerializedObject(" " * 1024 *
                                                                   1024 * 50)

        logging.info("Writing to %s", dataDir)

        try:
            t0 = time.time()
            for ix in range(gb * 20):
                diskCache.store(
                    ForaNative.PageId(HashNative.Hash.sha1(str(ix)),
                                      50 * 1024 * 1024, 50 * 1024 * 1024),
                    fiftyMegabytes)

            PerformanceTestReporter.recordTest(
                "python.BigBox.Disk.Write%sGB" % gb,
                time.time() - t0, None)

            t0 = time.time()
            for ix in range(gb * 20):
                diskCache.loadIfExists(
                    ForaNative.PageId(HashNative.Hash.sha1(str(ix)),
                                      50 * 1024 * 1024, 50 * 1024 * 1024))

            PerformanceTestReporter.recordTest(
                "python.BigBox.Disk.Read%sGB" % gb,
                time.time() - t0, None)

        finally:
            shutil.rmtree(dataDir)
コード例 #53
0
    def loopScalabilityTestTest(self, threads, testName):
        s3 = InMemoryS3Interface.InMemoryS3InterfaceFactory()


        text = """
            let doALoop = fun(x) {
                let res = 0
                for ix in sequence(x) {
                    res = res + ix + 1
                    }
                res
                };

            Vector.range(__thread_count__) ~~ {doALoop(1000000000 + _)}
            """.replace("__thread_count__", str(threads))

        t0 = time.time()

        _, simulation = \
            self.computeUsingSeveralWorkers(
                "1+1",
                s3,
                1,
                timeout = 240,
                memoryLimitMb = 55 * 1024,
                threadCount = 30,
                returnSimulation = True,
                useInMemoryCache = False
                )

        try:
            t0 = time.time()
            result = simulation.compute(text, timeout=240)
            totalTimeToReturnResult = time.time() - t0

            PerformanceTestReporter.recordTest(testName, totalTimeToReturnResult, None)
        finally:
            simulation.teardown()