def weirdStringSort(self, sz, machines=1, memory=1000): s3 = InMemoryS3Interface.InMemoryS3InterfaceFactory() text = """ let N = __size__; let values = Vector.range(N, fun(ix) { " " * ix }).paged; let s1 = cached`(#ExternalIoTask(#DistributedDataOperation(#Sort(values)))) let s2 = sorting.sort(values) if (size(s1) != size(s2)) return 'wrong size: %s != %s'.format(size(s1), size(s2)) for ix in sequence(size(s1)) if (s1[ix] != s2[ix]) return 'not equal: index=%s. %s != %s'.format(ix, s1[ix], s2[ix]) return true """.replace("__size__", str(sz)) result = InMemoryCumulusSimulation.computeUsingSeveralWorkers( text, s3, machines, timeout=TIMEOUT, memoryLimitMb=memory) self.assertTrue(result is not None) self.assertTrue(result.isResult(), result) self.assertTrue(result.asResult.result.pyval == True, result)
def test_sortManySimilarValues(self): s3 = InMemoryS3Interface.InMemoryS3InterfaceFactory() text = """ let values = [] let ct = 1000000 for ix in sequence(ct) values = values :: (ix%2) let sortedVals = cached`(#ExternalIoTask(#DistributedDataOperation(#Sort(values.paged)))) let sortedAndHomogenous = fun(v) { for ix in sequence(size(v)-1) if (v[ix] > v[ix+1] or `TypeJOV(v[ix]) is not `TypeJOV(v[ix+1])) throw (ix, v[ix], v[ix+1], `TypeJOV(v[ix]), `TypeJOV(v[ix+1])) return true; } if (size(sortedVals) != size(values)) throw "expected " + String(size(values)) + ", not " + String(size(sortedVals)) sortedAndHomogenous(sortedVals) """ result = InMemoryCumulusSimulation.computeUsingSeveralWorkers( text, s3, 1, timeout=TIMEOUT, memoryLimitMb=1000) self.assertTrue(result is not None) self.assertTrue(result.isResult(), result) self.assertTrue(result.asResult.result.pyval == True, result)
def test_sortVecOfVec(self): s3 = InMemoryS3Interface.InMemoryS3InterfaceFactory() text = """ let values = [] let ct = 500000 values = [(ix % 100, Vector.range(40)) for ix in sequence(ct)] let res = cached`(#ExternalIoTask(#DistributedDataOperation(#Sort(values.paged)))); let firstAreSorted = true; for ix in sequence(size(res)-1) if (res[ix][0] > res[ix+1][0]) firstAreSorted = false; size(res) == size(values) and firstAreSorted """ result = InMemoryCumulusSimulation.computeUsingSeveralWorkers( text, s3, 1, timeout=TIMEOUT, memoryLimitMb=3000) self.assertTrue(result is not None) self.assertTrue(result.isResult(), result) self.assertTrue(result.asResult.result.pyval == True, result)
def test_takeFromLargeObjects(self): s3 = InMemoryS3Interface.InMemoryS3InterfaceFactory() text = """ let N = 100; //each string is 1 MB let takeFrom = [" " * 100 * 100 * 10 * 10 + " " * ix for ix in sequence(N)].paged; let indices = Vector.range(N,fun(x) { x }).paged; cached`(#ExternalIoTask(#DistributedDataOperation(#Take(indices, takeFrom)))) """ try: result, simulation = InMemoryCumulusSimulation.computeUsingSeveralWorkers( text, s3, 1, timeout=TIMEOUT, memoryLimitMb=1000, returnSimulation=True, pageSizeOverride=1024 * 1024) self.assertTrue(result is not None) self.assertTrue(result.isResult(), result) for page in result.asResult.result.getVectorPageIds( simulation.getWorkerVdm(0)): self.assertLess(page.bytecount / 1024.0 / 1024.0, 2.0) finally: simulation.teardown()
def test_multiboxDataTasksTake_1(self): s3 = InMemoryS3Interface.InMemoryS3InterfaceFactory() text = """ let N = 10000000; let isPrime = fun(p) { let x = 2 while (x*x <= p) { if (p%x == 0) return 0 x = x + 1 } return x } let takeFrom = Vector.range(N, isPrime).paged; let indices = Vector.range(N,fun(x) { (0, (x * 503) % N ) }).paged; cached`(#ExternalIoTask(#DistributedDataOperation(#Take(indices, takeFrom)))) == indices ~~ { takeFrom[_[1]] } """ result = InMemoryCumulusSimulation.computeUsingSeveralWorkers( text, s3, 2, timeout=TIMEOUT, memoryLimitMb=1000) self.assertTrue(result is not None) self.assertTrue(result.isResult(), result) self.assertTrue(result.asResult.result.pyval == True, result)
def multiboxDataTasksSort(self, ct, workers=2, memoryLimit=100, pageSizeOverrideMB=1): s3 = InMemoryS3Interface.InMemoryS3InterfaceFactory() text = """ let N = __ct__; let aPrime = 503 let toSort = Vector.range(N, { ((_ * _) % aPrime, _) }).paged; let result = cached`(#ExternalIoTask(#DistributedDataOperation(#Sort(toSort)))) sorting.isSorted(result) """.replace("__ct__", str(ct)) result = InMemoryCumulusSimulation.computeUsingSeveralWorkers( text, s3, workers, timeout=TIMEOUT, memoryLimitMb=memoryLimit, pageSizeOverride=pageSizeOverrideMB * 1024 * 1024) self.assertTrue(result is not None) self.assertTrue(result.isResult(), result) self.assertTrue(result.asResult.result.pyval == True, result)
def disable_createVectorAndReferenceInMultipleComputations(self): s3 = InMemoryS3Interface.InMemoryS3InterfaceFactory() result, simulation = self.computeUsingSeveralWorkers( "1+1", s3, 2, memoryLimitMb=1000, returnSimulation=True, useInMemoryCache=False) try: vecComputation = simulation.createComputation(""" let count = 1000 * 1000 * 40 let fpow = fun(p) { fun(x) { Float32( (x / 1000000000.0) ) } }; Vector.range(10) ~~ fun(p) { Vector.range(count, fpow(p)).paged } """) #we want to verify that all of these computations use the same copy of the #bigvec that we create in the 'vecComputation' instance predComp = simulation.createComputation( "dataframe.DataFrame(vecs[1,])", vecs=vecComputation) regComp = simulation.createComputation( "dataframe.DataFrame(vecs[,1])", vecs=vecComputation) predCompStr = simulation.createComputation("String(pred)", pred=predComp) regCompStr = simulation.createComputation("String(reg)", reg=regComp) vecSumComp = simulation.createComputation("vecs ~~ {_.sum()}", vecs=vecComputation) simulation.submitComputation(predCompStr) simulation.submitComputation(regCompStr) simulation.submitComputation(vecSumComp) r1 = simulation.waitForAnyResult(timeout=60.0) r2 = simulation.waitForAnyResult(timeout=60.0) r3 = simulation.waitForAnyResult(timeout=60.0) #verify that simulation didn't write to disk sprt = simulation.getWorker(0).getSystemwidePageRefcountTracker() totalGb = sum([x.bytecount for x in sprt.getAllActivePages() ]) / 1024.0 / 1024.0 / 1024.0 logging.critical("%s", sprt.getViewOfSystem()) self.assertTrue(totalGb < 2.0, totalGb) finally: simulation.teardown()
def stringToInt64ParsingTest(self, threads, testName): s3 = InMemoryS3Interface.InMemoryS3InterfaceFactory() #we wish we could actually test that we achieve saturation here but we can't yet. text = """ let doALoop = fun(x) { //pass 's' through a vector so that the compiler can't tell what it is let s = ["2013"][0]; let res = 0 for ix in sequence(x) { if (ix == 0) s = s + String(ix) res = res + Int64(s) + ix } res }; Vector.range(__thread_count__) ~~ {doALoop(20000000 + _)} """.replace("__thread_count__", str(threads)) _, simulation = \ self.computeUsingSeveralWorkers( "1+1", s3, 1, timeout = 240, memoryLimitMb = 55 * 1024, threadCount = 30, returnSimulation = True, useInMemoryCache = False ) t0 = time.time() _, simulation = \ self.computeUsingSeveralWorkers( "1+1", s3, 1, timeout = 240, memoryLimitMb = 55 * 1024, threadCount = 30, returnSimulation = True, useInMemoryCache = False ) try: t0 = time.time() result = simulation.compute(text, timeout=240) totalTimeToReturnResult = time.time() - t0 PerformanceTestReporter.recordTest(testName, totalTimeToReturnResult, None) finally: simulation.teardown()
def test_sortVec2(self): s3 = InMemoryS3Interface.InMemoryS3InterfaceFactory() result = self.computeUsingSeveralWorkers(""" let v = Vector.range(50000, fun(ix) { ix / 10 } ); sorting.isSorted(sort(v)) """, s3, 4) self.assertEqual(result.asResult.result.pyval, True)
def gbmRegressionFittingTest(self, nRows, nColumns, depth, nThreads, maxBoosts): testName = self.getTestName(nRows, nColumns, depth, maxBoosts, nThreads) s3 = InMemoryS3Interface.InMemoryS3InterfaceFactory() result, simulation = InMemoryCumulusSimulation.computeUsingSeveralWorkers( self.dataGenerationScript(nRows, nColumns), s3, 1, timeout=360, memoryLimitMb=30 * 1024, threadCount=nThreads, returnSimulation=True, useInMemoryCache=False) try: self.assertTrue(result.isResult()) dfPredictors, dfResponse = result.asResult.result fitter = simulation.compute( self.regressionScript(depth, 1), timeout=360, dfResponse=dfResponse, dfPredictors=dfPredictors).asResult.result t0 = time.time() for nBoosts in range(1, maxBoosts): testName = self.getTestName(nRows, nColumns, depth, nBoosts, nThreads) predictions = simulation.compute( "fitter.predictionsAndPseudoresiduals()", timeout=360, fitter=fitter).asResult.result totalTimeToReturnResult = time.time() - t0 PerformanceTestReporter.recordTest(testName + "_predict", totalTimeToReturnResult, None) fitter = simulation.compute( "fitter.nextGivenPredictions(predictions)", timeout=360, fitter=fitter, predictions=predictions).asResult.result totalTimeToReturnResult = time.time() - t0 PerformanceTestReporter.recordTest(testName, totalTimeToReturnResult, None) finally: simulation.teardown()
def test_effectiveParallelism(self): s3 = InMemoryS3Interface.InMemoryS3InterfaceFactory() #do a burn-in run self.computeUsingSeveralWorkers(""" let v = Vector.range(5000000, { (1,_) } ); let f = fun(ix) { let res = 0 for x in sequence( (ix - 2000) >>> 0, ix ) res = res + size(v[x]) res } Vector.range(size(v), f).sum() """, s3, 2, wantsStats=True, timeout=240, memoryLimitMb=500)[1] t0 = time.time() stats = self.computeUsingSeveralWorkers(""" let v = Vector.range(5000000, { (1,_) } ); let f = fun(ix) { let res = 0 for x in sequence( (ix - 2000) >>> 0, ix ) res = res + size(v[x]) res } Vector.range(size(v), f).sum() """, s3, 2, wantsStats=True, timeout=240, memoryLimitMb=500)[1] timeElapsed = time.time() - t0 totalTime = stats.timeSpentInInterpreter + stats.timeSpentInCompiler effParallelism = totalTime / timeElapsed PerformanceTestReporter.recordTest( "python.cumulus.EffectiveParallelism.elapsed", timeElapsed, None) PerformanceTestReporter.recordTest( "python.cumulus.EffectiveParallelism.effectiveCores", effParallelism, {}, units='count')
def test_CalculationRicochet(self): s3 = InMemoryS3Interface.InMemoryS3InterfaceFactory() text = """ let f = fun(ct, seed = 1) { let x = 0 let res = [] let it = iterator(math.random.UniformReal(0, size(v), seed)) for ix in sequence(ct) { let x = Int64(pull it) res = res :: (x / Float64(size(v)), v[x]) } return res } v[2] f(__count__,__seed__) """ vResult, sim = InMemoryCumulusSimulation.computeUsingSeveralWorkers( "Vector.range(125000000, math.log)", s3, 4, timeout=120, memoryLimitMb=400, threadCount=1, useInMemoryCache=True, returnSimulation=True) try: v = vResult.asResult.result t0 = time.time() sim.compute(text.replace("__seed__", "1").replace("__count__", "1000"), timeout=120, v=v) PerformanceTestReporter.recordTest( "python.InMemoryCumulus.Ricochet1000.Pass1", time.time() - t0, None) t0 = time.time() sim.compute(text.replace("__seed__", "2").replace("__count__", "1000"), timeout=120, v=v) PerformanceTestReporter.recordTest( "python.InMemoryCumulus.Ricochet1000.Pass2", time.time() - t0, None) finally: sim.teardown()
def test_map_with_common(self): s3 = InMemoryS3Interface.InMemoryS3InterfaceFactory() res = self.computeUsingSeveralWorkers(""" let v1 = Vector.range(1000000).paged; let v2 = Vector.range(30000000) v2.sum(fun(i) { v1[(i * 100) % size(v1)] }) """, s3, 8, timeout=240,memoryLimitMb=100 ) self.assertTrue(res.isResult())
def gbmRegressionFittingTest(self, nRows, nColumns, depth, nThreads, nBoosts, copies, report=True): s3 = InMemoryS3Interface.InMemoryS3InterfaceFactory() result, simulation = InMemoryCumulusSimulation.computeUsingSeveralWorkers( self.dataGenerationScript(nRows, nColumns), s3, 1, timeout=360, memoryLimitMb=30 * 1024, threadCount=nThreads, returnSimulation=True, useInMemoryCache=False) try: self.assertTrue(result.isResult()) dfPredictors, dfResponse = result.asResult.result builder = simulation.compute( self.regressionScript(depth, nBoosts), timeout=360, dfResponse=dfResponse, dfPredictors=dfPredictors).asResult.result t0 = time.time() testName = self.getTestName(nRows, nColumns, depth, nBoosts, nThreads, copies) result = simulation.compute( "Vector.range(%s).apply(fun(x) { builder.fit(dfPredictors[,-x-1], dfResponse[,-x-1]) })" % copies, timeout=360, builder=builder, dfPredictors=dfPredictors, dfResponse=dfResponse, ).asResult.result totalTimeToReturnResult = time.time() - t0 if report: PerformanceTestReporter.recordTest(testName, totalTimeToReturnResult, None) finally: simulation.teardown()
def loadCheckpointFromFreshSimulationTest(self, calculationText, timestampsPerPassList, clientCount=1, timestep=1.0): s3 = InMemoryS3Interface.InMemoryS3InterfaceFactory() statuses = [] viewFactory = None for timestampsThisPass in timestampsPerPassList: simulation = InMemoryCumulusSimulation.InMemoryCumulusSimulation( 4, #worker count clientCount, memoryPerWorkerMB=100, threadsPerWorker=2, s3Service=s3, sharedStateViewFactory=viewFactory) viewFactory = simulation.sharedStateViewFactory statusesThisPass = [] try: self.assertTrue(simulation.waitForGlobalScheduler(timeout=2.0)) simulation.getGlobalScheduler().setCheckpointStatusInterval( 0.1) for ix in range(clientCount): simulation.submitComputationOnClient(ix, calculationText) for subPass in range(timestampsThisPass): time.sleep(timestep) statusesThisPass.append( self.timeElapsedOfMostRecentCheckpoints(simulation)) simulation.getGlobalScheduler( ).triggerFullCheckpointsOnOutstandingComputations() self.waitForFullCheckpoint(simulation) statusesThisPass.append( self.timeElapsedOfMostRecentCheckpoints(simulation)) finally: for ix in range(4): simulation.getWorker(ix).dumpStateToLog() simulation.teardown() statuses.append(statusesThisPass) return statuses
def test_CreateManySmallVectors(self): s3 = InMemoryS3Interface.InMemoryS3InterfaceFactory() text = """ let v = []; for ix in sequence(1000000) { v = v + [ix] }; v.sum() """ res = self.computeUsingSeveralWorkers(text, s3, 4) self.assertTrue(res.isResult()) self.assertEqual(res.asResult.result.pyval, 499999500000)
def test_takeLookupSemantics(self): s3 = InMemoryS3Interface.InMemoryS3InterfaceFactory() takeText = """ let directTake = fun(v, i) { i ~~ fun ((filters.IsInteger(...) ix1,filters.IsInteger(...) ix2)) { try { [v][ix1][ix2] } catch(...) { nothing } } (ix) { try { v[ix] } catch (...) { nothing } } }; let takeFrom = [1,2,3,4].paged; let indices = __indices__.paged; let result = cached`(#ExternalIoTask(#DistributedDataOperation(#Take(indices, takeFrom)))); let targetResult = directTake(takeFrom, indices) assertions.assertEqual(size(result), size(targetResult)) for ix in sequence(size(result)) if (result[ix] is not targetResult[ix]) return "Expected %s to yield %s, but got %s".format( indices[ix], targetResult[ix], result[ix] ); return true; """ def takeTest(indexExpr): result = InMemoryCumulusSimulation.computeUsingSeveralWorkers( takeText.replace("__indices__", indexExpr), s3, 1, timeout=TIMEOUT, memoryLimitMb=1000 ) self.assertTrue(result is not None) self.assertTrue(result.isResult(), result) self.assertTrue(result.asResult.result.pyval == True, result) takeTest("[0,1,2,3]") takeTest("[0,-1,2,3]") takeTest("[0,1,2,30]") takeTest("[(0,0),(0,1),(0,2),(0,3)]") takeTest("[(0,0),(0,1),(0,2),(0,30)]") takeTest("[(0,0),(0,1),(0,2),(3,0)]") takeTest("[(0u8,0u16),(0u32,1u64),(0s32,2s8),(0s16,3s64)]") takeTest("[0,-1,(), (0,0), (0,0.0), nothing, (1,0), (0u8,6u16), (-1,2)]")
def runOnGPU(self, funcExpr, vecExpr, captureExpr=""): s3 = InMemoryS3Interface.InMemoryS3InterfaceFactory() text = captureExpr + """ let f = __funcExpr__; let vec = __vecExpr__; cached`(#GpuApply(f, vec)); """.replace("__funcExpr__", funcExpr).replace("__vecExpr__", vecExpr) res = InMemoryCumulusSimulation.computeUsingSeveralWorkers(text, s3, 1, timeout=120, threadCount=4,memoryLimitMb=1000) self.assertIsNotNone(res) self.assertFalse(res.isException(), "Failed with %s" % res) return res
def test_DataFanout(self): s3 = InMemoryS3Interface.InMemoryS3InterfaceFactory() #we wish we could actually test that we achieve saturation here but we can't yet. text = """ let v = [[x].paged for x in sequence(8)] let q = v ~~ { let r = 0; for ix in sequence(10**8.5) r = r + _[0]; r } q.sum() """ self.assertIsNotNone(self.computeUsingSeveralWorkers(text, s3, 4, timeout = 120))
def test_gcOfPagedVectors(self): s3 = InMemoryS3Interface.InMemoryS3InterfaceFactory() self.computeUsingSeveralWorkers(""" let res = 0 for ix in sequence(100) { res = res + Vector.range(1000000+ix).paged.sum() } res """, s3, 4, timeout=240 )
def test_bigLmOnDataframe(self): s3 = InMemoryS3Interface.InMemoryS3InterfaceFactory() result = InMemoryCumulusSimulation.computeUsingSeveralWorkers( importanceSampling, s3, 1, memoryLimitMb=4000, threadCount=4, timeout=240, useInMemoryCache=False) self.assertTrue(result.isResult(), result)
def largeDatasetJoinTest(self, mbOfData, columns, threads, machineCount, ratio=.5): s3 = InMemoryS3Interface.InMemoryS3InterfaceFactory() t0 = time.time() result, simulation = InMemoryCumulusSimulation.computeUsingSeveralWorkers( self.dataGenerationScript(mbOfData, columns), s3, machineCount, timeout=360, memoryLimitMb=mbOfData / ratio / machineCount, #channelThroughputMBPerSecond = 100.0, threadCount=threads, returnSimulation=True, useInMemoryCache=False, disableEventHandler=True) try: self.assertTrue(result.isResult()) data = result.asResult.result joinScript = """ let leftDF = dataframe.DataFrame(data[,size(data)/2]) let rightDF = dataframe.DataFrame(data[size(data)/2,]) size(leftDF.join(rightDF, on: "C0", how: `outer, chunkSize: 1000000, areSorted:true)) """ t0 = time.time() result = simulation.compute(joinScript, timeout=1080, data=data) totalTimeToReturnResult = time.time() - t0 logging.info("Total time to join: %s", totalTimeToReturnResult) self.assertTrue(result.isResult(), result) PerformanceTestReporter.recordTest( "algorithms.Join.inMemory_%sMB_%scols_%sthreads_%smachines" % (mbOfData, columns, threads, machineCount), totalTimeToReturnResult, None) finally: dfResponse = None dfPredictors = None result = None simulation.teardown()
def test_VectorsAndSums(self): s3 = InMemoryS3Interface.InMemoryS3InterfaceFactory() text = """ let v = Vector.range(100).apply(fun(ix) { [ix * x for x in sequence(10000)].paged }) v.sum(fun(vElt) { sum(0, 10**7) + vElt.sum() }) """ res = self.computeUsingSeveralWorkers(text, s3, 4, timeout=20) self.assertTrue(res.isResult(), res)
def check_precision_of_function_on_GPU(self, function, input): s3 = InMemoryS3Interface.InMemoryS3InterfaceFactory() text = """ let f = fun(x) { `""" + function + """(x) } cached`(#GpuApply(f, [""" + str(input) + """]))[0] """ res = InMemoryCumulusSimulation.computeUsingSeveralWorkers(text, s3, 1, timeout=120, threadCount=4) self.assertIsNotNone(res) self.assertTrue(res.isResult(), res) gpuValue = res.asResult.result.pyval methodToCall = getattr(math, function) pythonValue = methodToCall(input) self.assertTrue(abs(gpuValue - pythonValue) < 1e-10)
def test_PythonIoTaskService2(self): s3 = InMemoryS3Interface.InMemoryS3InterfaceFactory() for ix1 in range(20): for ix2 in range(20): s3().setKeyValue( "bucketname", "key_%s_%s" % (ix1, ix2), "".join( ("%s,%s,%s\n" % (ix1, ix2, ix3) for ix3 in range(1024)))) text = """ datasets.s3('bucketname', 'key_0').dataAsString """ self.assertIsNotNone(self.computeUsingSeveralWorkers(text, s3, 1))
def largeDatasetBigLMTest(self, mbOfData, columns, threads, testName): s3 = InMemoryS3Interface.InMemoryS3InterfaceFactory() t0 = time.time() result, simulation = InMemoryCumulusSimulation.computeUsingSeveralWorkers( self.dataGenerationScript(mbOfData, columns), s3, 1, timeout=360, memoryLimitMb=50 * 1024, threadCount=threads, returnSimulation=True, useInMemoryCache=False) if testName is not None: PerformanceTestReporter.recordTest(testName + "_create", time.time() - t0, None) try: self.assertTrue(result.isResult()) dfResponse, dfPredictors = result.asResult.result regressionScript = """ let model = math.regression.LinearRegression(dfPredictors, dfResponse, fitIntercept: false); let coefficients = model.coefficients(); coefficients[0] """ t0 = time.time() result = simulation.compute(regressionScript, timeout=1080, dfResponse=dfResponse, dfPredictors=dfPredictors) totalTimeToReturnResult = time.time() - t0 self.assertTrue(result.isResult()) if testName is not None: PerformanceTestReporter.recordTest(testName, totalTimeToReturnResult, None) finally: dfResponse = None dfPredictors = None result = None simulation.teardown()
def test_multipart(self): factory = InMemoryS3Interface.InMemoryS3InterfaceFactory() publicInterface = factory() uploadID = publicInterface.initiateMultipartUpload("aBucket", "aKey") publicInterface.setMultipartUploadPart("aBucket", "aKey", uploadID, 1, "this ") publicInterface.setMultipartUploadPart("aBucket", "aKey", uploadID, 2, "is ") publicInterface.setMultipartUploadPart("aBucket", "aKey", uploadID, 3, "multipart") publicInterface.completeMultipartUpload("aBucket", "aKey", uploadID) self.assertEqual(publicInterface.getKeyValue("aBucket", "aKey"), "this is multipart")
def test_largeVectorRange(self): s3 = InMemoryS3Interface.InMemoryS3InterfaceFactory() res = self.computeUsingSeveralWorkers(""" let res = 0; for ix in sequence(10) { let v1 = Vector.range(50000000); res = res + size(v1); } res """, s3, 4, timeout = 200) if res.isResult(): self.assertEqual(res.asResult.result.pyvalOrNone, 50000000 * 10, res) else: self.assertTrue(False, res)
def test_vector_string_apply(self): #verify that the compiler doesn't crap out during many runs. s3 = InMemoryS3Interface.InMemoryS3InterfaceFactory() InMemoryCumulusSimulation.computeUsingSeveralWorkers(""" let v = Vector.range(10000000) let v2 = v.apply(String) let v3 = v2.apply({_ + "a"}) v3.sum(size) """, s3, 4, timeout=240)
def dataCreationTest(self, totalMB, workers=1, threadsPerWorker=4): s3 = InMemoryS3Interface.InMemoryS3InterfaceFactory() #we wish we could actually test that we achieve saturation here but we can't yet. text = """size(Vector.range(%s, {_*_}))""" % (totalMB * 1024 * 1024 / 8) self.assertIsNotNone( self.computeUsingSeveralWorkers(text, s3, workers, timeout=120, memoryLimitMb=totalMB / workers * 1.3, threadCount=threadsPerWorker, useInMemoryCache=False))