def basic_gpu_works_helper(self, function, onGPU=True): s3 = InMemoryS3Interface.InMemoryS3InterfaceFactory() testingVectorText = "Vector.range(1024*4, {_+1000000})" text = """ let f = fun(ct) { let res = 0.0 let x = 1.0 while (x < ct) { x = x + 1.0 res = res + `""" + function + """(x) } res }""" if onGPU: text += """`CUDAVectorApply(f,""" + testingVectorText + """)""" else: text += testingVectorText + """ ~~ f""" res = InMemoryCumulusSimulation.computeUsingSeveralWorkers(text, s3, 1, timeout=120, threadCount=4) self.assertIsNotNone(res) self.assertTrue(res.isResult(), res)
def test_takeFromLargeObjects(self): s3 = InMemoryS3Interface.InMemoryS3InterfaceFactory() text = """ let N = 100; //each string is 1 MB let takeFrom = [" " * 100 * 100 * 10 * 10 + " " * ix for ix in sequence(N)].paged; let indices = Vector.range(N,fun(x) { x }).paged; cached`(#ExternalIoTask(#DistributedDataOperation(#Take(indices, takeFrom)))) """ try: result, simulation = InMemoryCumulusSimulation.computeUsingSeveralWorkers( text, s3, 1, timeout=TIMEOUT, memoryLimitMb=1000, returnSimulation = True, pageSizeOverride = 1024 * 1024 ) self.assertTrue(result is not None) self.assertTrue(result.isResult(), result) for page in result.asResult.result.getVectorPageIds(simulation.getWorkerVdm(0)): self.assertLess(page.bytecount / 1024.0 / 1024.0, 2.0) finally: simulation.teardown()
def test_sortHeterogeneous(self): s3 = InMemoryS3Interface.InMemoryS3InterfaceFactory() text = """ let values = [] let ct = 1000000 for ix in sequence(ct) values = values :: ix :: Float64(ix) let sortedVals = cached`(#ExternalIoTask(#DistributedDataOperation(#Sort(values.paged)))) let sortedAndHomogenous = fun(v) { for ix in sequence(size(v)-1) if (v[ix] >= v[ix+1] or `TypeJOV(v[ix]) is not `TypeJOV(v[ix+1])) throw (ix, v[ix], v[ix+1]) return true; } if (size(sortedVals) != size(values)) throw "expected " + String(size(values)) + ", not " + String(size(sortedVals)) sortedAndHomogenous(sortedVals[,ct]) and sortedAndHomogenous(sortedVals[ct,]) """ result = InMemoryCumulusSimulation.computeUsingSeveralWorkers( text, s3, 1, timeout=TIMEOUT, memoryLimitMb=1000 ) self.assertTrue(result is not None) self.assertTrue(result.isResult(), result) self.assertTrue(result.asResult.result.pyval == True, result)
def basicTaskPathwayTest(self, sz, machines=1, memory=1000): s3 = InMemoryS3Interface.InMemoryS3InterfaceFactory() text = """ let N = __size__; //let values = Vector.range(N,fun(x) { ((x * 503) % N, x) }).paged; let values = Vector.range(N).paged; let s1 = cached`(#ExternalIoTask(#DistributedDataOperation(#Sort(values)))) let s2 = sorting.sort(values) if (size(s1) != size(s2)) return 'wrong size: %s != %s'.format(size(s1), size(s2)) for ix in sequence(size(s1)) if (s1[ix] != s2[ix]) return 'not equal: index=%s. %s != %s'.format(ix, s1[ix], s2[ix]) return true """.replace("__size__", str(sz)) result = InMemoryCumulusSimulation.computeUsingSeveralWorkers( text, s3, machines, timeout=TIMEOUT, memoryLimitMb=memory ) self.assertTrue(result is not None) self.assertTrue(result.isResult(), result) self.assertTrue(result.asResult.result.pyval == True, result)
def test_vector_string_apply(self): #verify that the compiler doesn't crap out during many runs. s3 = InMemoryS3Interface.InMemoryS3InterfaceFactory() InMemoryCumulusSimulation.computeUsingSeveralWorkers(""" let v = Vector.range(10000000) let v2 = v.apply(String) let v3 = v2.apply({_ + "a"}) v3.sum(size) """, s3, 4, timeout=240)
def test_splitToRowMajor(self): s3 = InMemoryS3Interface.InMemoryS3InterfaceFactory() nRows = 100000 nColumns = 50 result, simulation = InMemoryCumulusSimulation.computeUsingSeveralWorkers( self.setupScript(nRows, nColumns), s3, 1, timeout=30, memoryLimitMb=8 * 1024, threadCount=4, returnSimulation=True, useInMemoryCache=False) try: self.assertTrue(result.isResult()) setup = result.asResult.result t0 = time.time() result = simulation.compute(self.splitToRowMajorScript(), timeout=360, chunks=setup) totalTimeToReturnResult = time.time() - t0 self.assertTrue(result.isResult()) PerformanceTestReporter.recordTest( "algorithms.text.splitToRowMajor.%srows_%scolumns" % (nRows, nColumns), totalTimeToReturnResult, None) finally: simulation.teardown()
def classSortingTest(self, sz, useClass = True, machines=1, memory=1000): s3 = InMemoryS3Interface.InMemoryS3InterfaceFactory() text = """ let N = __size__; let C = if (__use_class__) { class { member x; } } else { Int64 } let values = Vector.range(N, C).paged; let s1 = cached`(#ExternalIoTask(#DistributedDataOperation(#Sort(values)))) return size(s1) == N """.replace("__size__", str(sz)).replace("__use_class__", '1' if useClass else '0') result = InMemoryCumulusSimulation.computeUsingSeveralWorkers( text, s3, machines, timeout=TIMEOUT, memoryLimitMb=memory ) self.assertTrue(result is not None) self.assertTrue(result.isResult(), result) self.assertTrue(result.asResult.result.pyval == True, result)
def multiboxDataTasksSort(self, ct, workers=2, memoryLimit=100, pageSizeOverrideMB=1): s3 = InMemoryS3Interface.InMemoryS3InterfaceFactory() text = """ let N = __ct__; let aPrime = 503 let toSort = Vector.range(N, { ((_ * _) % aPrime, _) }).paged; let result = cached`(#ExternalIoTask(#DistributedDataOperation(#Sort(toSort)))) sorting.isSorted(result) """.replace("__ct__", str(ct)) result = InMemoryCumulusSimulation.computeUsingSeveralWorkers( text, s3, workers, timeout=TIMEOUT, memoryLimitMb=memoryLimit, pageSizeOverride=pageSizeOverrideMB*1024*1024 ) self.assertTrue(result is not None) self.assertTrue(result.isResult(), result) self.assertTrue(result.asResult.result.pyval == True, result)
def test_transposeToColumnMajor(self): s3 = InMemoryS3Interface.InMemoryS3InterfaceFactory() nRows = 100000 nColumns = 50 result, simulation = InMemoryCumulusSimulation.computeUsingSeveralWorkers( self.transposeSetupScript(nRows, nColumns), s3, 1, timeout = 300, memoryLimitMb = 45 * 1024, threadCount = 30, returnSimulation = True, useInMemoryCache = False) try: self.assertTrue(result.isResult()) rowMajor = result.asResult.result t0 = time.time() result = simulation.compute( self.transposeRowMajorToColumnMajorScript(nRows, nColumns), timeout = 500, rowMajor = rowMajor ) totalTimeToReturnResult = time.time() - t0 self.assertTrue(result.isResult()) PerformanceTestReporter.recordTest( "algorithms.text.transposeRowMajorToColumnMajor.%srows_%scolumns" % (nRows, nColumns), totalTimeToReturnResult, None) finally: simulation.teardown()
def basic_gpu_works_helper(self, function, onGPU=True): s3 = InMemoryS3Interface.InMemoryS3InterfaceFactory() testingVectorText = "Vector.range(1024*4, {_+1000000})" text = """ let f = fun(ct) { let res = 0.0 let x = 1.0 while (x < ct) { x = x + 1.0 res = res + `""" + function + """(x) } res }""" if onGPU: text += """`CUDAVectorApply(f,""" + testingVectorText + """)""" else: text += testingVectorText + """ ~~ f""" res = InMemoryCumulusSimulation.computeUsingSeveralWorkers( text, s3, 1, timeout=120, threadCount=4) self.assertIsNotNone(res) self.assertTrue(res.isResult(), res)
def test_sortVecOfVec(self): s3 = InMemoryS3Interface.InMemoryS3InterfaceFactory() text = """ let values = [] let ct = 500000 values = [(ix % 100, Vector.range(40)) for ix in sequence(ct)] let res = cached`(#ExternalIoTask(#DistributedDataOperation(#Sort(values.paged)))); let firstAreSorted = true; for ix in sequence(size(res)-1) if (res[ix][0] > res[ix+1][0]) firstAreSorted = false; size(res) == size(values) and firstAreSorted """ result = InMemoryCumulusSimulation.computeUsingSeveralWorkers( text, s3, 1, timeout=TIMEOUT, memoryLimitMb=3000) self.assertTrue(result is not None) self.assertTrue(result.isResult(), result) self.assertTrue(result.asResult.result.pyval == True, result)
def test_sortVecOfVec(self): s3 = InMemoryS3Interface.InMemoryS3InterfaceFactory() text = """ let values = [] let ct = 500000 values = [(ix % 100, Vector.range(40)) for ix in sequence(ct)] let res = cached`(#ExternalIoTask(#DistributedDataOperation(#Sort(values.paged)))); let firstAreSorted = true; for ix in sequence(size(res)-1) if (res[ix][0] > res[ix+1][0]) firstAreSorted = false; size(res) == size(values) and firstAreSorted """ result = InMemoryCumulusSimulation.computeUsingSeveralWorkers( text, s3, 1, timeout=TIMEOUT, memoryLimitMb=3000 ) self.assertTrue(result is not None) self.assertTrue(result.isResult(), result) self.assertTrue(result.asResult.result.pyval == True, result)
def test_multiboxDataTasksTake_1(self): s3 = InMemoryS3Interface.InMemoryS3InterfaceFactory() text = """ let N = 10000000; let isPrime = fun(p) { let x = 2 while (x*x <= p) { if (p%x == 0) return 0 x = x + 1 } return x } let takeFrom = Vector.range(N, isPrime).paged; let indices = Vector.range(N,fun(x) { (0, (x * 503) % N ) }).paged; cached`(#ExternalIoTask(#DistributedDataOperation(#Take(indices, takeFrom)))) == indices ~~ { takeFrom[_[1]] } """ result = InMemoryCumulusSimulation.computeUsingSeveralWorkers( text, s3, 2, timeout=TIMEOUT, memoryLimitMb=1000 ) self.assertTrue(result is not None) self.assertTrue(result.isResult(), result) self.assertTrue(result.asResult.result.pyval == True, result)
def test_vector_string_apply(self): #verify that the compiler doesn't crap out during many runs. s3 = InMemoryS3Interface.InMemoryS3InterfaceFactory() InMemoryCumulusSimulation.computeUsingSeveralWorkers(""" let v = Vector.range(10000000) let v2 = v.apply(String) let v3 = v2.apply({_ + "a"}) v3.sum(size) """, s3, 4, timeout=240 )
def gbmRegressionFittingTest(self, nRows, nColumns, depth, nThreads, maxBoosts): testName = self.getTestName(nRows, nColumns, depth, maxBoosts, nThreads) s3 = InMemoryS3Interface.InMemoryS3InterfaceFactory() result, simulation = InMemoryCumulusSimulation.computeUsingSeveralWorkers( self.dataGenerationScript(nRows, nColumns), s3, 1, timeout=360, memoryLimitMb=30 * 1024, threadCount=nThreads, returnSimulation=True, useInMemoryCache=False) try: self.assertTrue(result.isResult()) dfPredictors, dfResponse = result.asResult.result fitter = simulation.compute( self.regressionScript(depth, 1), timeout=360, dfResponse=dfResponse, dfPredictors=dfPredictors).asResult.result t0 = time.time() for nBoosts in range(1, maxBoosts): testName = self.getTestName(nRows, nColumns, depth, nBoosts, nThreads) predictions = simulation.compute( "fitter.predictionsAndPseudoresiduals()", timeout=360, fitter=fitter).asResult.result totalTimeToReturnResult = time.time() - t0 PerformanceTestReporter.recordTest(testName + "_predict", totalTimeToReturnResult, None) fitter = simulation.compute( "fitter.nextGivenPredictions(predictions)", timeout=360, fitter=fitter, predictions=predictions).asResult.result totalTimeToReturnResult = time.time() - t0 PerformanceTestReporter.recordTest(testName, totalTimeToReturnResult, None) finally: simulation.teardown()
def gbmRegressionFittingTest(self, nRows, nColumns, depth, nThreads, maxBoosts): testName = self.getTestName(nRows, nColumns, depth, maxBoosts, nThreads) s3 = InMemoryS3Interface.InMemoryS3InterfaceFactory() result, simulation = InMemoryCumulusSimulation.computeUsingSeveralWorkers( self.dataGenerationScript(nRows, nColumns), s3, 1, timeout = 360, memoryLimitMb = 30 * 1024, threadCount = nThreads, returnSimulation = True, useInMemoryCache = False ) try: self.assertTrue(result.isResult()) dfPredictors, dfResponse = result.asResult.result fitter = simulation.compute( self.regressionScript(depth, 1), timeout = 360, dfResponse = dfResponse, dfPredictors = dfPredictors ).asResult.result t0 = time.time() for nBoosts in range(1, maxBoosts): testName = self.getTestName(nRows, nColumns, depth, nBoosts, nThreads) predictions = simulation.compute( "fitter.predictionsAndPseudoresiduals()", timeout = 360, fitter = fitter ).asResult.result totalTimeToReturnResult = time.time() - t0 PerformanceTestReporter.recordTest( testName + "_predict", totalTimeToReturnResult, None) fitter = simulation.compute( "fitter.nextGivenPredictions(predictions)", timeout = 360, fitter = fitter, predictions = predictions ).asResult.result totalTimeToReturnResult = time.time() - t0 PerformanceTestReporter.recordTest( testName, totalTimeToReturnResult, None) finally: simulation.teardown()
def test_CalculationRicochet(self): s3 = InMemoryS3Interface.InMemoryS3InterfaceFactory() text = """ let f = fun(ct, seed = 1) { let x = 0 let res = [] let it = iterator(math.random.UniformReal(0, size(v), seed)) for ix in sequence(ct) { let x = Int64(pull it) res = res :: (x / Float64(size(v)), v[x]) } return res } v[2] f(__count__,__seed__) """ vResult, sim = InMemoryCumulusSimulation.computeUsingSeveralWorkers( "Vector.range(125000000, math.log)", s3, 4, timeout=120, memoryLimitMb=400, threadCount=1, useInMemoryCache=True, returnSimulation=True) try: v = vResult.asResult.result t0 = time.time() sim.compute(text.replace("__seed__", "1").replace("__count__", "1000"), timeout=120, v=v) PerformanceTestReporter.recordTest( "python.InMemoryCumulus.Ricochet1000.Pass1", time.time() - t0, None) t0 = time.time() sim.compute(text.replace("__seed__", "2").replace("__count__", "1000"), timeout=120, v=v) PerformanceTestReporter.recordTest( "python.InMemoryCumulus.Ricochet1000.Pass2", time.time() - t0, None) finally: sim.teardown()
def takeTest(indexExpr): result = InMemoryCumulusSimulation.computeUsingSeveralWorkers( takeText.replace("__indices__", indexExpr), s3, 1, timeout=TIMEOUT, memoryLimitMb=1000) self.assertTrue(result is not None) self.assertTrue(result.isResult(), result) self.assertTrue(result.asResult.result.pyval == True, result)
def test_disk_scans(self): s3 = ActualS3Interface.ActualS3InterfaceFactory() objectStore = S3ObjectStore.S3ObjectStore( s3, Setup.config().userDataS3Bucket, prefix="test_object_cache/" ) _, simulation = InMemoryCumulusSimulation.computeUsingSeveralWorkers( "1+1", s3, 1, memoryLimitMb=1 * 1024, threadCount=30, returnSimulation=True, ioTaskThreadOverride=8, objectStore=objectStore, useInMemoryCache=False #use an actual disk cache for this ) try: gigabytes = 8 t0 = time.time() resultVectors = [] for ix in range(gigabytes): result = simulation.compute("Vector.range(125000000 + %s)" % ix, timeout=120) resultVectors.append(result.asResult.result) t1 = time.time() intResults = [] for vec in resultVectors: result = simulation.compute("v.sum()", timeout = 120, v=vec) intResults.append(result.asResult.result.pyval) self.assertTrue(len(intResults) == gigabytes) PerformanceTestReporter.recordTest( "python.BigBox.Disk.Write.10GB", t1 - t0, None ) PerformanceTestReporter.recordTest( "python.BigBox.Disk.WriteAndScan.%sGB" % gigabytes, time.time() - t0, None ) finally: simulation.teardown()
def gbmRegressionFittingTest(self, nRows, nColumns, depth, nThreads, nBoosts, copies, report=True): s3 = InMemoryS3Interface.InMemoryS3InterfaceFactory() result, simulation = InMemoryCumulusSimulation.computeUsingSeveralWorkers( self.dataGenerationScript(nRows, nColumns), s3, 1, timeout=360, memoryLimitMb=30 * 1024, threadCount=nThreads, returnSimulation=True, useInMemoryCache=False) try: self.assertTrue(result.isResult()) dfPredictors, dfResponse = result.asResult.result builder = simulation.compute( self.regressionScript(depth, nBoosts), timeout=360, dfResponse=dfResponse, dfPredictors=dfPredictors).asResult.result t0 = time.time() testName = self.getTestName(nRows, nColumns, depth, nBoosts, nThreads, copies) result = simulation.compute( "Vector.range(%s).apply(fun(x) { builder.fit(dfPredictors[,-x-1], dfResponse[,-x-1]) })" % copies, timeout=360, builder=builder, dfPredictors=dfPredictors, dfResponse=dfResponse, ).asResult.result totalTimeToReturnResult = time.time() - t0 if report: PerformanceTestReporter.recordTest(testName, totalTimeToReturnResult, None) finally: simulation.teardown()
def takeTest(indexExpr): result = InMemoryCumulusSimulation.computeUsingSeveralWorkers( takeText.replace("__indices__", indexExpr), s3, 1, timeout=TIMEOUT, memoryLimitMb=1000 ) self.assertTrue(result is not None) self.assertTrue(result.isResult(), result) self.assertTrue(result.asResult.result.pyval == True, result)
def largeDatasetJoinTest(self, mbOfData, columns, threads, machineCount, ratio = .5): s3 = InMemoryS3Interface.InMemoryS3InterfaceFactory() t0 = time.time() result, simulation = InMemoryCumulusSimulation.computeUsingSeveralWorkers( self.dataGenerationScript(mbOfData, columns), s3, machineCount, timeout = 360, memoryLimitMb = mbOfData / ratio / machineCount, #channelThroughputMBPerSecond = 100.0, threadCount = threads, returnSimulation = True, useInMemoryCache = False, disableEventHandler = True ) try: self.assertTrue(result.isResult()) data = result.asResult.result joinScript = """ let leftDF = dataframe.DataFrame(data[,size(data)/2]) let rightDF = dataframe.DataFrame(data[size(data)/2,]) size(leftDF.join(rightDF, on: "C0", how: `outer, chunkSize: 1000000, areSorted:true)) """ t0 = time.time() result = simulation.compute( joinScript, timeout=1080, data=data ) totalTimeToReturnResult = time.time() - t0 logging.info("Total time to join: %s", totalTimeToReturnResult) self.assertTrue(result.isResult(), result) PerformanceTestReporter.recordTest( "algorithms.Join.inMemory_%sMB_%scols_%sthreads_%smachines" % (mbOfData, columns,threads,machineCount), totalTimeToReturnResult, None ) finally: dfResponse = None dfPredictors = None result = None simulation.teardown()
def runOnGPU(self, funcExpr, vecExpr, captureExpr=""): s3 = InMemoryS3Interface.InMemoryS3InterfaceFactory() text = captureExpr + """ let f = __funcExpr__; let vec = __vecExpr__; cached`(#GpuApply(f, vec)); """.replace("__funcExpr__", funcExpr).replace("__vecExpr__", vecExpr) res = InMemoryCumulusSimulation.computeUsingSeveralWorkers(text, s3, 1, timeout=120, threadCount=4,memoryLimitMb=1000) self.assertIsNotNone(res) self.assertFalse(res.isException(), "Failed with %s" % res) return res
def test_bigLmOnDataframe(self): s3 = InMemoryS3Interface.InMemoryS3InterfaceFactory() result = InMemoryCumulusSimulation.computeUsingSeveralWorkers( importanceSampling, s3, 1, memoryLimitMb=4000, threadCount=4, timeout=240, useInMemoryCache=False) self.assertTrue(result.isResult(), result)
def test_importanceSampling(self): s3 = InMemoryS3Interface.InMemoryS3InterfaceFactory() result = InMemoryCumulusSimulation.computeUsingSeveralWorkers( importanceSampling, s3, 4, memoryLimitMb = 1000, timeout=240, useInMemoryCache = False ) self.assertTrue(result.isResult())
def largeDatasetJoinTest(self, mbOfData, columns, threads, machineCount, ratio=.5): s3 = InMemoryS3Interface.InMemoryS3InterfaceFactory() t0 = time.time() result, simulation = InMemoryCumulusSimulation.computeUsingSeveralWorkers( self.dataGenerationScript(mbOfData, columns), s3, machineCount, timeout=360, memoryLimitMb=mbOfData / ratio / machineCount, #channelThroughputMBPerSecond = 100.0, threadCount=threads, returnSimulation=True, useInMemoryCache=False, disableEventHandler=True) try: self.assertTrue(result.isResult()) data = result.asResult.result joinScript = """ let leftDF = dataframe.DataFrame(data[,size(data)/2]) let rightDF = dataframe.DataFrame(data[size(data)/2,]) size(leftDF.join(rightDF, on: "C0", how: `outer, chunkSize: 1000000, areSorted:true)) """ t0 = time.time() result = simulation.compute(joinScript, timeout=1080, data=data) totalTimeToReturnResult = time.time() - t0 logging.info("Total time to join: %s", totalTimeToReturnResult) self.assertTrue(result.isResult(), result) PerformanceTestReporter.recordTest( "algorithms.Join.inMemory_%sMB_%scols_%sthreads_%smachines" % (mbOfData, columns, threads, machineCount), totalTimeToReturnResult, None) finally: dfResponse = None dfPredictors = None result = None simulation.teardown()
def test_bigLmOnDataframe(self): s3 = InMemoryS3Interface.InMemoryS3InterfaceFactory() result = InMemoryCumulusSimulation.computeUsingSeveralWorkers( importanceSampling, s3, 1, memoryLimitMb = 4000, threadCount = 4, timeout=240, useInMemoryCache = False ) self.assertTrue(result.isResult(), result)
def check_precision_of_function_on_GPU(self, function, input): s3 = InMemoryS3Interface.InMemoryS3InterfaceFactory() text = """ let f = fun(x) { `""" + function + """(x) } `CUDAVectorApply(f, [""" + str(input) + """])[0] """ res = InMemoryCumulusSimulation.computeUsingSeveralWorkers(text, s3, 1, timeout=120, threadCount=4) self.assertIsNotNone(res) self.assertTrue(res.isResult(), res) gpuValue = res.asResult.result.pyval methodToCall = getattr(math, function) pythonValue = methodToCall(input) self.assertTrue(abs(gpuValue - pythonValue) < 1e-10)
def check_precision_of_function_on_GPU(self, function, input): s3 = InMemoryS3Interface.InMemoryS3InterfaceFactory() text = """ let f = fun(x) { `""" + function + """(x) } cached`(#GpuApply(f, [""" + str(input) + """]))[0] """ res = InMemoryCumulusSimulation.computeUsingSeveralWorkers(text, s3, 1, timeout=120, threadCount=4) self.assertIsNotNone(res) self.assertTrue(res.isResult(), res) gpuValue = res.asResult.result.pyval methodToCall = getattr(math, function) pythonValue = methodToCall(input) self.assertTrue(abs(gpuValue - pythonValue) < 1e-10)
def largeDatasetBigLMTest(self, mbOfData, columns, threads, testName): s3 = InMemoryS3Interface.InMemoryS3InterfaceFactory() t0 = time.time() result, simulation = InMemoryCumulusSimulation.computeUsingSeveralWorkers( self.dataGenerationScript(mbOfData, columns), s3, 1, timeout = 360, memoryLimitMb = 50 * 1024, threadCount = threads, returnSimulation = True, useInMemoryCache = False ) if testName is not None: PerformanceTestReporter.recordTest(testName + "_create", time.time() - t0, None) try: self.assertTrue(result.isResult()) dfResponse, dfPredictors = result.asResult.result regressionScript = """ let model = math.regression.LinearRegression(dfPredictors, dfResponse, fitIntercept: false); let coefficients = model.coefficients(); coefficients[0] """ t0 = time.time() result = simulation.compute( regressionScript, timeout=1080, dfResponse=dfResponse, dfPredictors=dfPredictors ) totalTimeToReturnResult = time.time() - t0 self.assertTrue(result.isResult()) if testName is not None: PerformanceTestReporter.recordTest(testName, totalTimeToReturnResult, None) finally: dfResponse = None dfPredictors = None result = None simulation.teardown()
def largeDatasetBigLMTest(self, mbOfData, columns, threads, testName): s3 = InMemoryS3Interface.InMemoryS3InterfaceFactory() t0 = time.time() result, simulation = InMemoryCumulusSimulation.computeUsingSeveralWorkers( self.dataGenerationScript(mbOfData, columns), s3, 1, timeout=360, memoryLimitMb=50 * 1024, threadCount=threads, returnSimulation=True, useInMemoryCache=False) if testName is not None: PerformanceTestReporter.recordTest(testName + "_create", time.time() - t0, None) try: self.assertTrue(result.isResult()) dfResponse, dfPredictors = result.asResult.result regressionScript = """ let model = math.regression.LinearRegression(dfPredictors, dfResponse, fitIntercept: false); let coefficients = model.coefficients(); coefficients[0] """ t0 = time.time() result = simulation.compute(regressionScript, timeout=1080, dfResponse=dfResponse, dfPredictors=dfPredictors) totalTimeToReturnResult = time.time() - t0 self.assertTrue(result.isResult()) if testName is not None: PerformanceTestReporter.recordTest(testName, totalTimeToReturnResult, None) finally: dfResponse = None dfPredictors = None result = None simulation.teardown()
def test_vector_transpose(self): #verify that the compiler doesn't crap out during many runs. s3 = InMemoryS3Interface.InMemoryS3InterfaceFactory() _, simulation = InMemoryCumulusSimulation.computeUsingSeveralWorkers("1+1", s3, 2, memoryLimitMb = 500, timeout=10, returnSimulation = True, channelThroughputMBPerSecond = 50.0 ) try: result = simulation.compute(""" let arrangedContiguously = fun (vecs) { let res = vecs.sum().paged; let tr = [] let low = 0 for v in vecs { tr = tr :: res[low,low+size(v)] low = low + size(v) } tr }; let transpose = fun(vecOfIndexable) { let vecs = arrangedContiguously(vecOfIndexable) let n = size(vecs[0]); [[vecs[jx][ix] for jx in sequence(size(vecs))] for ix in sequence(n)] }; let vectors = Vector.range(5000, {Vector.range(300)}) transpose(vectors) """, timeout = 30.0 ) self.assertTrue(result.isResult()) finally: simulation.teardown()
def regressionTreePredictionTest(self, mbOfData, columns, testName, treeDepth, threads, minSamplesSplit=50): s3 = InMemoryS3Interface.InMemoryS3InterfaceFactory() result, simulation = InMemoryCumulusSimulation.computeUsingSeveralWorkers( self.dataGenerationScript(mbOfData, columns), s3, 1, timeout=360, memoryLimitMb=45 * 1024, threadCount=threads, returnSimulation=True, useInMemoryCache=False) try: self.assertTrue(result.isResult()) dfResponse, dfPredictors = result.asResult.result fitTree = simulation.compute(self.regressionScript( treeDepth, minSamplesSplit - 1), timeout=120, dfResponse=dfResponse, dfPredictors=dfPredictors) def predictionScript(dirtyFlag=1): return ";(%s; fitRegressionTree.predict(dfPredictors));" % dirtyFlag t0 = time.time() result = simulation.compute( predictionScript(), timeout=120, dfPredictors=dfPredictors, fitRegressionTree=fitTree.asResult.result) totalTimeToReturnResult = time.time() - t0 self.assertTrue(result.isResult()) PerformanceTestReporter.recordTest(testName, totalTimeToReturnResult, None) finally: simulation.teardown()
def gbmRegressionFittingTest(self, nRows, nColumns, depth, nThreads, nBoosts, copies, report=True): s3 = InMemoryS3Interface.InMemoryS3InterfaceFactory() result, simulation = InMemoryCumulusSimulation.computeUsingSeveralWorkers( self.dataGenerationScript(nRows, nColumns), s3, 1, timeout = 360, memoryLimitMb = 30 * 1024, threadCount = nThreads, returnSimulation = True, useInMemoryCache = False ) try: self.assertTrue(result.isResult()) dfPredictors, dfResponse = result.asResult.result builder = simulation.compute( self.regressionScript(depth, nBoosts), timeout = 360, dfResponse = dfResponse, dfPredictors = dfPredictors ).asResult.result t0 = time.time() testName = self.getTestName(nRows, nColumns, depth, nBoosts, nThreads, copies) result = simulation.compute( "Vector.range(%s).apply(fun(x) { builder.fit(dfPredictors[,-x-1], dfResponse[,-x-1]) })" % copies, timeout = 360, builder=builder, dfPredictors=dfPredictors, dfResponse=dfResponse, ).asResult.result totalTimeToReturnResult = time.time() - t0 if report: PerformanceTestReporter.recordTest(testName, totalTimeToReturnResult, None) finally: simulation.teardown()
def test_CalculationRicochet(self): s3 = InMemoryS3Interface.InMemoryS3InterfaceFactory() text = """ let f = fun(ct, seed = 1) { let x = 0 let res = [] let it = iterator(math.random.UniformReal(0, size(v), seed)) for ix in sequence(ct) { let x = Int64(pull it) res = res :: (x / Float64(size(v)), v[x]) } return res } v[2] f(__count__,__seed__) """ vResult, sim = InMemoryCumulusSimulation.computeUsingSeveralWorkers( "Vector.range(125000000, math.log)", s3, 4, timeout = 120, memoryLimitMb=400, threadCount = 1, useInMemoryCache = True, returnSimulation = True ) try: v = vResult.asResult.result t0 = time.time() sim.compute(text.replace("__seed__", "1").replace("__count__", "1000"), timeout = 120, v = v) PerformanceTestReporter.recordTest("python.InMemoryCumulus.Ricochet1000.Pass1", time.time() - t0,None) t0 = time.time() sim.compute(text.replace("__seed__", "2").replace("__count__", "1000"), timeout = 120, v = v) PerformanceTestReporter.recordTest("python.InMemoryCumulus.Ricochet1000.Pass2", time.time() - t0,None) finally: sim.teardown()
def test_disk_scans(self): s3 = ActualS3Interface.ActualS3InterfaceFactory() objectStore = S3ObjectStore.S3ObjectStore( s3, Setup.config().userDataS3Bucket, prefix="test_object_cache/") _, simulation = InMemoryCumulusSimulation.computeUsingSeveralWorkers( "1+1", s3, 1, memoryLimitMb=1 * 1024, threadCount=30, returnSimulation=True, ioTaskThreadOverride=8, objectStore=objectStore, useInMemoryCache=False #use an actual disk cache for this ) try: gigabytes = 8 t0 = time.time() resultVectors = [] for ix in range(gigabytes): result = simulation.compute("Vector.range(125000000 + %s)" % ix, timeout=120) resultVectors.append(result.asResult.result) t1 = time.time() intResults = [] for vec in resultVectors: result = simulation.compute("v.sum()", timeout=120, v=vec) intResults.append(result.asResult.result.pyval) self.assertTrue(len(intResults) == gigabytes) PerformanceTestReporter.recordTest("python.BigBox.Disk.Write.10GB", t1 - t0, None) PerformanceTestReporter.recordTest( "python.BigBox.Disk.WriteAndScan.%sGB" % gigabytes, time.time() - t0, None) finally: simulation.teardown()
def compareCudaToCPUnoCheck(self, funcExpr, vecExpr, captureExpr=""): s3 = InMemoryS3Interface.InMemoryS3InterfaceFactory() text = captureExpr + """ let f = __funcExpr__; let vec = __vecExpr__; let cuda = cached`(#GpuApply(f, vec)); let cpu = [f(x) for x in vec] if (cuda == cpu) true else throw String(cuda) + " != " + String(cpu) """.replace("__funcExpr__", funcExpr).replace("__vecExpr__", vecExpr) res = InMemoryCumulusSimulation.computeUsingSeveralWorkers(text, s3, 1, timeout=120, threadCount=4) self.assertIsNotNone(res) return res
def compareCudaToCPU(self, funcExpr, vecExpr): s3 = InMemoryS3Interface.InMemoryS3InterfaceFactory() text = """ let f = __funcExpr__; let i = __vecExpr__; let cuda = `CUDAVectorApply(f, [i])[0]; let cpu = f(i) if (cuda == cpu) true else throw String(cuda) + " != " + String(cpu) """.replace("__funcExpr__", funcExpr).replace("__vecExpr__", vecExpr) res = InMemoryCumulusSimulation.computeUsingSeveralWorkers(text, s3, 1, timeout=120, threadCount=4) self.assertIsNotNone(res) self.assertTrue(res.isResult(), "Failed with %s on %s: %s" % (funcExpr, vecExpr, res))
def regressionTreePredictionTest(self, mbOfData, columns, testName, treeDepth, threads, minSamplesSplit=50): s3 = InMemoryS3Interface.InMemoryS3InterfaceFactory() result, simulation = InMemoryCumulusSimulation.computeUsingSeveralWorkers( self.dataGenerationScript(mbOfData, columns), s3, 1, timeout = 360, memoryLimitMb = 45 * 1024, threadCount = threads, returnSimulation = True, useInMemoryCache = False ) try: self.assertTrue(result.isResult()) dfResponse, dfPredictors = result.asResult.result fitTree = simulation.compute( self.regressionScript(treeDepth, minSamplesSplit - 1), timeout=120, dfResponse=dfResponse, dfPredictors=dfPredictors ) def predictionScript(dirtyFlag=1): return ";(%s; fitRegressionTree.predict(dfPredictors));" % dirtyFlag t0 = time.time() result = simulation.compute( predictionScript(), timeout=120, dfPredictors=dfPredictors, fitRegressionTree=fitTree.asResult.result ) totalTimeToReturnResult = time.time() - t0 self.assertTrue(result.isResult()) PerformanceTestReporter.recordTest(testName, totalTimeToReturnResult, None) finally: simulation.teardown()
def test_takeFromLargeObjectsAsymmetric(self): s3 = InMemoryS3Interface.InMemoryS3InterfaceFactory() text = """ let N = 20; //every thousandth string is 1 MB. Just take those. let takeFrom = [ if (ix % 1000 == 0) (" " * 100 * 100 * 10 * 10 + " " * (ix / 1000)) else "" for ix in sequence(N * 1000)].paged; let indices = Vector.range(N,fun(x) { x * 1000 }).paged; let result = cached`(#ExternalIoTask(#DistributedDataOperation(#Take(indices, takeFrom)))) let targetResult = indices ~~ {takeFrom[_]}; assertions.assertEqual(size(result), size(targetResult)) assertions.assertEqual(result, targetResult) result """ try: result, simulation = InMemoryCumulusSimulation.computeUsingSeveralWorkers( text, s3, 1, timeout=TIMEOUT, memoryLimitMb=1000, returnSimulation = True, pageSizeOverride = 1024 * 1024 ) self.assertTrue(result is not None) self.assertTrue(result.isResult(), result) for page in result.asResult.result.getVectorPageIds(simulation.getWorkerVdm(0)): self.assertLess(page.bytecount / 1024.0 / 1024.0, 5.0) finally: simulation.teardown()
def test_multiboxDataTasksTake_2(self): s3 = InMemoryS3Interface.InMemoryS3InterfaceFactory() text = """ let N = 10 * 1000000; let takeFrom = Vector.range(N) let indices = Vector.range(N,fun(x) { (0, (x * 503) % N ) }); cached`(#ExternalIoTask(#DistributedDataOperation(#Take(indices, takeFrom))))[0] """ result, simulation = InMemoryCumulusSimulation.computeUsingSeveralWorkers( text, s3, 8, timeout=TIMEOUT, memoryLimitMb=200, returnSimulation=True) logging.info("Simulation completed") maxHighWatermark = 0 try: for ix in range(8): vdm = simulation.getWorkerVdm(ix) vdmm = vdm.getMemoryManager() logging.info("Total bytes: %s", vdmm.getTotalBytesMmappedHighWaterMark()) maxHighWatermark = max( maxHighWatermark, vdmm.getTotalBytesMmappedHighWaterMark()) vdm = None vdmm = None self.assertTrue(result is not None) self.assertTrue(result.isResult(), result) self.assertTrue(isinstance(result.asResult.result.pyval, int), result) finally: simulation.teardown() self.assertTrue(maxHighWatermark < 265 * 1024 * 1024)
def test_multiboxDataTasksTake_2(self): s3 = InMemoryS3Interface.InMemoryS3InterfaceFactory() text = """ let N = 10 * 1000000; let takeFrom = Vector.range(N) let indices = Vector.range(N,fun(x) { (0, (x * 503) % N ) }); cached`(#ExternalIoTask(#DistributedDataOperation(#Take(indices, takeFrom))))[0] """ result, simulation = InMemoryCumulusSimulation.computeUsingSeveralWorkers( text, s3, 8, timeout=TIMEOUT, memoryLimitMb=200, returnSimulation = True ) logging.info("Simulation completed") maxHighWatermark = 0 try: for ix in range(8): vdm = simulation.getWorkerVdm(ix) vdmm = vdm.getMemoryManager() logging.info("Total bytes: %s", vdmm.getTotalBytesMmappedHighWaterMark()) maxHighWatermark = max(maxHighWatermark, vdmm.getTotalBytesMmappedHighWaterMark()) vdm = None vdmm = None self.assertTrue(result is not None) self.assertTrue(result.isResult(), result) self.assertTrue(isinstance(result.asResult.result.pyval,int), result) finally: simulation.teardown() self.assertTrue(maxHighWatermark < 265 * 1024 * 1024)
def compareCudaToCPU(self, funcExpr, vecExpr): s3 = InMemoryS3Interface.InMemoryS3InterfaceFactory() text = """ let f = __funcExpr__; let i = __vecExpr__; let cuda = `CUDAVectorApply(f, [i])[0]; let cpu = f(i) if (cuda == cpu) true else throw String(cuda) + " != " + String(cpu) """.replace("__funcExpr__", funcExpr).replace("__vecExpr__", vecExpr) res = InMemoryCumulusSimulation.computeUsingSeveralWorkers( text, s3, 1, timeout=120, threadCount=4) self.assertIsNotNone(res) self.assertTrue(res.isResult(), "Failed with %s on %s: %s" % (funcExpr, vecExpr, res))
def roundtripExecute(self, pyObject, *args): mappings = PureImplementationMappings.PureImplementationMappings() binaryObjectRegistry = BinaryObjectRegistry.BinaryObjectRegistry() walker = PyObjectWalker.PyObjectWalker(mappings, binaryObjectRegistry) ids = [walker.walkPyObject(o) for o in [pyObject] + list(args)] binaryObjectRegistry.defineEndOfStream() data = binaryObjectRegistry.str() streamReader = PythonBinaryStreamToImplval.constructConverter( Converter.canonicalPurePythonModule(), self.vdm) streamReader.read(data) implVals = [streamReader.getObjectById(i) for i in ids] result = InMemoryCumulusSimulation.computeUsingSeveralWorkers( ForaNative.ImplValContainer((implVals[0], ForaNative.makeSymbol("Call")) + tuple(implVals[1:])), InMemoryS3Interface.InMemoryS3InterfaceFactory(), 1) self.assertTrue(result.isResult(), result) result = result.asResult.result converter = PythonBinaryStreamFromImplval.constructConverter( Converter.canonicalPurePythonModule(), self.vdm) root_id, data = converter.write(result) rehydrator = PythonObjectRehydrator( mappings, allowUserCodeModuleLevelLookups=False) return rehydrator.convertEncodedStringToPythonObject(data, root_id)
def largeDatasetBigLMTest(self, mbOfData, columns, threads, machineCount, ratio=.4): s3 = InMemoryS3Interface.InMemoryS3InterfaceFactory() t0 = time.time() result, simulation = InMemoryCumulusSimulation.computeUsingSeveralWorkers( self.dataGenerationScript(mbOfData, columns), s3, machineCount, timeout=360, memoryLimitMb=mbOfData / ratio / machineCount, channelThroughputMBPerSecond=100.0, threadCount=threads, returnSimulation=True, useInMemoryCache=False) try: self.assertTrue(result.isResult()) dfResponse, dfPredictors = result.asResult.result regressionScript = """ let model = math.regression.LinearRegression(dfPredictors, dfResponse,coefficientsOnly:true, splitLimit: 50000); let coefficients = model.coefficients(); coefficients[0] """ t0 = time.time() result = simulation.compute(regressionScript, timeout=1080, dfResponse=dfResponse, dfPredictors=dfPredictors) totalTimeToReturnResult = time.time() - t0 self.assertTrue(result.isResult(), result) self.assertTrue(result.isResult()) print "Done with the first regression" regressionScript2 = """ let newCol = dfPredictors.rowApply(fun(row) { math.sin(row[0] ) }) let newCol2 = dfPredictors.rowApply(fun(row) { math.sin(row[0] + 1) }) let model2 = math.regression.LinearRegression(dfPredictors.addColumn(newCol).addColumn(newCol2), dfResponse, coefficientsOnly:true, splitLimit: 50000) model2.coefficients()[0] """ result2 = simulation.compute(regressionScript2, timeout=1080, dfResponse=dfResponse, dfPredictors=dfPredictors) totalTimeToReturnResult = time.time() - t0 self.assertTrue(result2.isResult(), result2) PerformanceTestReporter.recordTest( "algorithms.linearRegression.inMemory_%sMB_%scols_%sthreads_%smachines" % (mbOfData, columns, threads, machineCount), totalTimeToReturnResult, None) finally: dfResponse = None dfPredictors = None result = None simulation.teardown()
def dataframeSumTest(self, mbOfData, colCount, threadCount, recordResults = True): s3 = InMemoryS3Interface.InMemoryS3InterfaceFactory() t0 = time.time() randomColumnsToPick = 10 totalRowsToSum = 1000000 result, simulation = InMemoryCumulusSimulation.computeUsingSeveralWorkers( self.dataGenerationScript(mbOfData, colCount), s3, count=1, timeout = 360, memoryLimitMb = 10000, threadCount = threadCount, returnSimulation = True, useInMemoryCache = False, channelThroughputMBPerSecond = None ) try: self.assertTrue(result.isResult()) data = result.asResult.result executionScript = (""" let randomRowwiseSumFun = fun (row, randomColumnsToPick, baseSeed){ let rng = iterator(math.random.MultiplyWithCarry(baseSeed + row.rowIndex())); let tr = nothing; let ix = 0; let rowSize = size(row) while (ix < randomColumnsToPick) { let nextIx = (pull rng) % rowSize; tr = tr + row[nextIx] ix = ix + 1 } tr } let randomColumnsToPick = __subsetSize__; let baseSeed = 5; sum(0, __rows_to_sum__, fun(ix) { randomRowwiseSumFun(data[ix % size(data)], randomColumnsToPick, baseSeed) }) """ .replace("__subsetSize__",str(randomColumnsToPick)) .replace("__rows_to_sum__",str(totalRowsToSum * threadCount)) ) t0 = time.time() result = simulation.compute( executionScript, timeout=1080, data=data ) computeDuration = time.time() - t0 totalValuesAccessed = totalRowsToSum * randomColumnsToPick * threadCount totalValuesPerSecondPerThread = totalValuesAccessed * 2 / computeDuration / threadCount secondsToDo10MillionPerThread = 10 * 1000000 / totalValuesPerSecondPerThread if recordResults: PerformanceTestReporter.recordTest( "python.BigBox.RandomColumnAccess.access10mm_%smb_%scols_%sthreads" % ( mbOfData, colCount, threadCount ), secondsToDo10MillionPerThread, None ) self.assertTrue(result.isResult()) return computeDuration finally: dfResponse = None dfPredictors = None result = None simulation.teardown()
def computeUsingSeveralWorkers(self, *args, **kwds): return InMemoryCumulusSimulation.computeUsingSeveralWorkers( *args, **kwds)
def largeDatasetBigLMTest(self, mbOfData, columns, threads, machineCount, ratio = .5): s3 = InMemoryS3Interface.InMemoryS3InterfaceFactory() t0 = time.time() result, simulation = InMemoryCumulusSimulation.computeUsingSeveralWorkers( self.dataGenerationScript(mbOfData, columns), s3, machineCount, timeout = 360, memoryLimitMb = mbOfData / ratio / machineCount, channelThroughputMBPerSecond = 100.0, threadCount = threads, returnSimulation = True, useInMemoryCache = False ) try: self.assertTrue(result.isResult()) dfResponse, dfPredictors = result.asResult.result regressionScript = """ let model = math.regression.LinearRegression(dfPredictors, dfResponse,coefficientsOnly:true); let coefficients = model.coefficients(); coefficients[0] """ t0 = time.time() result = simulation.compute( regressionScript, timeout=1080, dfResponse=dfResponse, dfPredictors=dfPredictors ) totalTimeToReturnResult = time.time() - t0 self.assertTrue(result.isResult(), result) self.assertTrue(result.isResult()) print "Done with the first regression" regressionScript2 = """ let newCol = dfPredictors.rowApply(fun(row) { math.sin(row[0] ) }) let newCol2 = dfPredictors.rowApply(fun(row) { math.sin(row[0] + 1) }) let model2 = math.regression.LinearRegression(dfPredictors.addColumn(newCol).addColumn(newCol2), dfResponse, coefficientsOnly:true) model2.coefficients()[0] """ result2 = simulation.compute( regressionScript2, timeout=1080, dfResponse=dfResponse, dfPredictors=dfPredictors ) totalTimeToReturnResult = time.time() - t0 self.assertTrue(result2.isResult(), result2) PerformanceTestReporter.recordTest( "algorithms.linearRegression.inMemory_%sMB_%scols_%sthreads_%smachines" % (mbOfData, columns,threads,machineCount), totalTimeToReturnResult, None ) finally: dfResponse = None dfPredictors = None result = None simulation.teardown()
def computeUsingSeveralWorkers(self, *args, **kwds): return InMemoryCumulusSimulation.computeUsingSeveralWorkers(*args, **kwds)
def dataframeSumTest(self, mbOfData, colCount, threadCount, recordResults=True): s3 = InMemoryS3Interface.InMemoryS3InterfaceFactory() t0 = time.time() randomColumnsToPick = 10 totalRowsToSum = 1000000 result, simulation = InMemoryCumulusSimulation.computeUsingSeveralWorkers( self.dataGenerationScript(mbOfData, colCount), s3, count=1, timeout=360, memoryLimitMb=10000, threadCount=threadCount, returnSimulation=True, useInMemoryCache=False, channelThroughputMBPerSecond=None) try: self.assertTrue(result.isResult()) data = result.asResult.result executionScript = (""" let randomRowwiseSumFun = fun (row, randomColumnsToPick, baseSeed){ let rng = iterator(math.random.MultiplyWithCarry(baseSeed + row.rowIndex())); let tr = nothing; let ix = 0; let rowSize = size(row) while (ix < randomColumnsToPick) { let nextIx = (pull rng) % rowSize; tr = tr + row[nextIx] ix = ix + 1 } tr } let randomColumnsToPick = __subsetSize__; let baseSeed = 5; sum(0, __rows_to_sum__, fun(ix) { randomRowwiseSumFun(data[ix % size(data)], randomColumnsToPick, baseSeed) }) """.replace("__subsetSize__", str(randomColumnsToPick)).replace( "__rows_to_sum__", str(totalRowsToSum * threadCount))) t0 = time.time() result = simulation.compute(executionScript, timeout=1080, data=data) computeDuration = time.time() - t0 totalValuesAccessed = totalRowsToSum * randomColumnsToPick * threadCount totalValuesPerSecondPerThread = totalValuesAccessed * 2 / computeDuration / threadCount secondsToDo10MillionPerThread = 10 * 1000000 / totalValuesPerSecondPerThread if recordResults: PerformanceTestReporter.recordTest( "python.BigBox.RandomColumnAccess.access10mm_%smb_%scols_%sthreads" % (mbOfData, colCount, threadCount), secondsToDo10MillionPerThread, None) self.assertTrue(result.isResult()) return computeDuration finally: dfResponse = None dfPredictors = None result = None simulation.teardown()