def test_ParseRowsAsFloatVectors(self): s3 = InMemoryS3Interface.InMemoryS3InterfaceFactory() rows = 10000 bucketData = "\n".join([ ",".join([str(((x * row)**2) % 503) for x in range(200)]) for row in range(rows) ]) s3().setKeyValue("bucketname", "key", bucketData) text = """ let bytes = datasets.s3('bucketname', 'key'); let rows = bytes.split(fun(x){x == 10}); let parsed = rows.apply(fun(row) { row.split( fun(x) { x == ','[0] }, fun(x) { Float64(x.dataAsString) } ) }) size(parsed) """ res = self.computeUsingSeveralWorkers(text, s3, 4, timeout=30) self.assertTrue(res.isResult(), res) self.assertEqual(res.asResult.result.pyval, rows)
def dataCreationTest(self, totalMB, workers, threadsPerWorker, testName): s3 = InMemoryS3Interface.InMemoryS3InterfaceFactory() #we wish we could actually test that we achieve saturation here but we can't yet. text = """size(Vector.range(%s, {_*_}))""" % (totalMB * 1024 * 1024 / 8) t0 = time.time() result, simulation = self.computeUsingSeveralWorkers( "1+1", s3, workers, timeout=120, memoryLimitMb=55 * 1024 / workers, threadCount=threadsPerWorker, returnSimulation=True, useInMemoryCache=False) try: t0 = time.time() result = simulation.compute(text, timeout=120) totalTimeToReturnResult = time.time() - t0 PerformanceTestReporter.recordTest(testName, totalTimeToReturnResult, None) finally: simulation.teardown()
def test_splitToRowMajor(self): s3 = InMemoryS3Interface.InMemoryS3InterfaceFactory() nRows = 100000 nColumns = 50 result, simulation = InMemoryCumulusSimulation.computeUsingSeveralWorkers( self.setupScript(nRows, nColumns), s3, 1, timeout=30, memoryLimitMb=8 * 1024, threadCount=4, returnSimulation=True, useInMemoryCache=False) try: self.assertTrue(result.isResult()) setup = result.asResult.result t0 = time.time() result = simulation.compute(self.splitToRowMajorScript(), timeout=360, chunks=setup) totalTimeToReturnResult = time.time() - t0 self.assertTrue(result.isResult()) PerformanceTestReporter.recordTest( "algorithms.text.splitToRowMajor.%srows_%scolumns" % (nRows, nColumns), totalTimeToReturnResult, None) finally: simulation.teardown()
def test_computeManyGetitems(self): #verify that the compiler doesn't crap out during many runs. s3 = InMemoryS3Interface.InMemoryS3InterfaceFactory() interpreterTimes = [] for ix in range(10): interpTime = self.computeUsingSeveralWorkers( """ let f = fun(x){x+x+x+x+x} let v = [f((x+1,x+2,x-10,x)) for x in sequence(4000)]; let res = 0; let r = %s + 0; for ix in sequence(1) { res = res + Vector.range(size(v[0])).sum(fun(ix) { v.sum(fun(x){x[ix]+r}) }) } res """ % ix, s3, 1, wantsStats=True, timeout=240)[1].timeSpentInInterpreter interpreterTimes.append(interpTime) for interpTime in interpreterTimes[1:]: # ignoring the first run self.assertLess(interpTime, (sum(interpreterTimes) - interpTime) / (len(interpreterTimes) - 1) * 10)
def test_sortVecOfVec(self): s3 = InMemoryS3Interface.InMemoryS3InterfaceFactory() text = """ let values = [] let ct = 500000 values = [(ix % 100, Vector.range(40)) for ix in sequence(ct)] let res = cached`(#ExternalIoTask(#DistributedDataOperation(#Sort(values.paged)))); let firstAreSorted = true; for ix in sequence(size(res)-1) if (res[ix][0] > res[ix+1][0]) firstAreSorted = false; size(res) == size(values) and firstAreSorted """ result = InMemoryCumulusSimulation.computeUsingSeveralWorkers( text, s3, 1, timeout=TIMEOUT, memoryLimitMb=3000) self.assertTrue(result is not None) self.assertTrue(result.isResult(), result) self.assertTrue(result.asResult.result.pyval == True, result)
def test_invalidURL(self): s3 = InMemoryS3Interface.InMemoryS3InterfaceFactory() res = self.computeUsingSeveralWorkers( """datasets.htpp("not a valid url")""", s3, 1) self.assertTrue(res.isException())
def test_page_glomming_multiple(self): s3 = InMemoryS3Interface.InMemoryS3InterfaceFactory() result, simulation = self.computeUsingSeveralWorkers( """ let v = sum(0, 1000, fun(ix) { [sum(0,ix)].paged }); (v, v.sum()) """, s3, 4, timeout=240, returnSimulation=True) try: sprt = simulation.getWorker(0).getSystemwidePageRefcountTracker() def activePageCount(): return len([ x for x in sprt.getAllPages() if sprt.machinesWithPageInRam(x) ]) self.assertBecomesTrueEventually( lambda: activePageCount() <= 10, 10.0, lambda: "Total number of pages should have become 1, not %s.\nView of system:\n\n%s" % (activePageCount(), sprt.getViewOfSystem())) sprt = None finally: simulation.teardown()
def test_page_glomming_common_pages(self): s3 = InMemoryS3Interface.InMemoryS3InterfaceFactory() result, simulation = self.computeUsingSeveralWorkers( """ let v1 = [1].paged + [2].paged; let v2 = v1 + [3].paged; (v1, v2) """, s3, 4, timeout=240, returnSimulation=True) try: sprt = simulation.getWorker(0).getSystemwidePageRefcountTracker() def noOrphanedPages(): return len(sprt.getPagesThatAppearOrphaned()) == 0 self.assertBecomesTrueEventually( noOrphanedPages, 5.0, lambda: "No pages should be orphaned.\nView of system:\n\n%s" % (sprt.getViewOfSystem())) sprt = None finally: simulation.teardown()
def test_schedulerEventsAreSerializable(self): s3 = InMemoryS3Interface.InMemoryS3InterfaceFactory() result, simulation = self.computeUsingSeveralWorkers(""" sum(0,10**10) """, s3, 4, timeout=240, returnSimulation=True ) try: someHadEvents = False for worker, vdm, eventHandler in simulation.workersVdmsAndEventHandlers: events = eventHandler.extractEvents() events2 = pickle.loads(pickle.dumps(events)) print len(events), "events" print len(pickle.dumps(events)), " bytes" print len(pickle.dumps(events)) / len(events), " bytes per event." self.assertTrue(len(events2) == len(events)) if len(events): someHadEvents = True CumulusNative.replayCumulusWorkerEventStream(events, True) self.assertTrue(someHadEvents) worker = None vdm = None eventHandler = None finally: simulation.teardown()
def test_cumulusCanTriggerNewRegimes(self): s3 = InMemoryS3Interface.InMemoryS3InterfaceFactory() simulation = InMemoryCumulusSimulation.InMemoryCumulusSimulation( 4, #worker count 1, memoryPerWorkerMB=100, threadsPerWorker=2, s3Service=s3) try: self.assertTrue(simulation.waitForGlobalScheduler(timeout=2.0)) simulation.waitForHandshake() regime = simulation.getWorker(0).getRegimeHash() self.assertTrue(regime is not None) simulation.getWorker(0).triggerRegimeChange() time.sleep(1.0) simulation.waitForHandshake() regime2 = simulation.getWorker(0).getRegimeHash() self.assertTrue(regime2 is not None) self.assertTrue(regime2 != regime) finally: simulation.teardown()
def test_recoveryWithUnreadDatasetsS3(self): s3 = InMemoryS3Interface.InMemoryS3InterfaceFactory() s3().setKeyValue("bucketname", "key", "this is some data") simulation = self.createSimulation(s3Service=s3) try: #give the simulation a couple of seconds to pick a scheduler self.assertTrue(simulation.waitForGlobalScheduler(timeout=2.0)) simulation.submitComputation(""" let data = datasets.s3("bucketname","key") let res = sum(0,10**12) data[res % 2] """) time.sleep(1.0) simulation.getGlobalScheduler( ).triggerFullCheckpointsOnOutstandingComputations() self.waitForAllCheckpointsToClear(simulation) finally: simulation.teardown()
def stringCreationAndSumTest(self, totalStrings, workers, threadsPerWorker, testName): s3 = InMemoryS3Interface.InMemoryS3InterfaceFactory() #we wish we could actually test that we achieve saturation here but we can't yet. text = """Vector.range(%s, String).sum(size)""" % totalStrings t0 = time.time() _, simulation = \ self.computeUsingSeveralWorkers( "1+1", s3, workers, timeout = 240, memoryLimitMb = 55 * 1024 / workers, threadCount = threadsPerWorker, returnSimulation = True, useInMemoryCache = False ) try: t0 = time.time() result = simulation.compute(text, timeout=240) totalTimeToReturnResult = time.time() - t0 PerformanceTestReporter.recordTest(testName, totalTimeToReturnResult, None) finally: simulation.teardown()
def test_takeFromLargeObjects(self): s3 = InMemoryS3Interface.InMemoryS3InterfaceFactory() text = """ let N = 100; //each string is 1 MB let takeFrom = [" " * 100 * 100 * 10 * 10 + " " * ix for ix in sequence(N)].paged; let indices = Vector.range(N,fun(x) { x }).paged; cached`(#ExternalIoTask(#DistributedDataOperation(#Take(indices, takeFrom)))) """ try: result, simulation = InMemoryCumulusSimulation.computeUsingSeveralWorkers( text, s3, 1, timeout=TIMEOUT, memoryLimitMb=1000, returnSimulation = True, pageSizeOverride = 1024 * 1024 ) self.assertTrue(result is not None) self.assertTrue(result.isResult(), result) for page in result.asResult.result.getVectorPageIds(simulation.getWorkerVdm(0)): self.assertLess(page.bytecount / 1024.0 / 1024.0, 2.0) finally: simulation.teardown()
def basicTaskPathwayTest(self, sz, machines=1, memory=1000): s3 = InMemoryS3Interface.InMemoryS3InterfaceFactory() text = """ let N = __size__; //let values = Vector.range(N,fun(x) { ((x * 503) % N, x) }).paged; let values = Vector.range(N).paged; let s1 = cached`(#ExternalIoTask(#DistributedDataOperation(#Sort(values)))) let s2 = sorting.sort(values) if (size(s1) != size(s2)) return 'wrong size: %s != %s'.format(size(s1), size(s2)) for ix in sequence(size(s1)) if (s1[ix] != s2[ix]) return 'not equal: index=%s. %s != %s'.format(ix, s1[ix], s2[ix]) return true """.replace("__size__", str(sz)) result = InMemoryCumulusSimulation.computeUsingSeveralWorkers( text, s3, machines, timeout=TIMEOUT, memoryLimitMb=memory ) self.assertTrue(result is not None) self.assertTrue(result.isResult(), result) self.assertTrue(result.asResult.result.pyval == True, result)
def test_sortHeterogeneous(self): s3 = InMemoryS3Interface.InMemoryS3InterfaceFactory() text = """ let values = [] let ct = 1000000 for ix in sequence(ct) values = values :: ix :: Float64(ix) let sortedVals = cached`(#ExternalIoTask(#DistributedDataOperation(#Sort(values.paged)))) let sortedAndHomogenous = fun(v) { for ix in sequence(size(v)-1) if (v[ix] >= v[ix+1] or `TypeJOV(v[ix]) is not `TypeJOV(v[ix+1])) throw (ix, v[ix], v[ix+1]) return true; } if (size(sortedVals) != size(values)) throw "expected " + String(size(values)) + ", not " + String(size(sortedVals)) sortedAndHomogenous(sortedVals[,ct]) and sortedAndHomogenous(sortedVals[ct,]) """ result = InMemoryCumulusSimulation.computeUsingSeveralWorkers( text, s3, 1, timeout=TIMEOUT, memoryLimitMb=1000 ) self.assertTrue(result is not None) self.assertTrue(result.isResult(), result) self.assertTrue(result.asResult.result.pyval == True, result)
def test_multiboxDataTasksTake_1(self): s3 = InMemoryS3Interface.InMemoryS3InterfaceFactory() text = """ let N = 10000000; let isPrime = fun(p) { let x = 2 while (x*x <= p) { if (p%x == 0) return 0 x = x + 1 } return x } let takeFrom = Vector.range(N, isPrime).paged; let indices = Vector.range(N,fun(x) { (0, (x * 503) % N ) }).paged; cached`(#ExternalIoTask(#DistributedDataOperation(#Take(indices, takeFrom)))) == indices ~~ { takeFrom[_[1]] } """ result = InMemoryCumulusSimulation.computeUsingSeveralWorkers( text, s3, 2, timeout=TIMEOUT, memoryLimitMb=1000 ) self.assertTrue(result is not None) self.assertTrue(result.isResult(), result) self.assertTrue(result.asResult.result.pyval == True, result)
def multiboxDataTasksSort(self, ct, workers=2, memoryLimit=100, pageSizeOverrideMB=1): s3 = InMemoryS3Interface.InMemoryS3InterfaceFactory() text = """ let N = __ct__; let aPrime = 503 let toSort = Vector.range(N, { ((_ * _) % aPrime, _) }).paged; let result = cached`(#ExternalIoTask(#DistributedDataOperation(#Sort(toSort)))) sorting.isSorted(result) """.replace("__ct__", str(ct)) result = InMemoryCumulusSimulation.computeUsingSeveralWorkers( text, s3, workers, timeout=TIMEOUT, memoryLimitMb=memoryLimit, pageSizeOverride=pageSizeOverrideMB*1024*1024 ) self.assertTrue(result is not None) self.assertTrue(result.isResult(), result) self.assertTrue(result.asResult.result.pyval == True, result)
def classSortingTest(self, sz, useClass = True, machines=1, memory=1000): s3 = InMemoryS3Interface.InMemoryS3InterfaceFactory() text = """ let N = __size__; let C = if (__use_class__) { class { member x; } } else { Int64 } let values = Vector.range(N, C).paged; let s1 = cached`(#ExternalIoTask(#DistributedDataOperation(#Sort(values)))) return size(s1) == N """.replace("__size__", str(sz)).replace("__use_class__", '1' if useClass else '0') result = InMemoryCumulusSimulation.computeUsingSeveralWorkers( text, s3, machines, timeout=TIMEOUT, memoryLimitMb=memory ) self.assertTrue(result is not None) self.assertTrue(result.isResult(), result) self.assertTrue(result.asResult.result.pyval == True, result)
def basic_gpu_works_helper(self, function, onGPU=True): s3 = InMemoryS3Interface.InMemoryS3InterfaceFactory() testingVectorText = "Vector.range(1024*4, {_+1000000})" text = """ let f = fun(ct) { let res = 0.0 let x = 1.0 while (x < ct) { x = x + 1.0 res = res + `""" + function + """(x) } res }""" if onGPU: text += """`CUDAVectorApply(f,""" + testingVectorText + """)""" else: text += testingVectorText + """ ~~ f""" res = InMemoryCumulusSimulation.computeUsingSeveralWorkers( text, s3, 1, timeout=120, threadCount=4) self.assertIsNotNone(res) self.assertTrue(res.isResult(), res)
def test_PythonIoTaskService3(self): s3 = InMemoryS3Interface.InMemoryS3InterfaceFactory() s3.setThroughputPerMachine(1024 * 1024 * 20) for ix in range(35): s3().setKeyValue("bucketname", "key_%s" % ix, " " * 10 * 1024 * 1024) text = """ datasets.s3('bucketname', 'key').sum() """ self.assertIsNotNone( self.computeUsingSeveralWorkers(text, s3, 4, timeout=120, blockUntilConnected=True)) totalBytecount = 0 for machine, bytecount in s3.getPerMachineBytecounts().iteritems(): totalBytecount += bytecount self.assertTrue(totalBytecount / 1024 / 1024 <= 370, totalBytecount / 1024 / 1024)
def test_expansionWithVecOfVec(self): s3 = InMemoryS3Interface.InMemoryS3InterfaceFactory() simulation = InMemoryCumulusSimulation.InMemoryCumulusSimulation( 4, #worker count 1, memoryPerWorkerMB=100, threadsPerWorker=2, s3Service=s3) try: self.assertTrue(simulation.waitForGlobalScheduler(timeout=2.0)) simulation.getGlobalScheduler().setCheckpointStatusInterval(0.0001) simulation.submitComputation( "Vector.range(20, fun(ix) { Vector.range(100000+ix).paged }).paged" ) simulation.waitForAnyResult() simulation.addWorker() self.assertTrue(simulation.waitForHandshake()) finally: simulation.teardown()
def test_produceLotsOfData(self): #verify that the compiler doesn't crap out during many runs. s3 = InMemoryS3Interface.InMemoryS3InterfaceFactory() result, simulation = self.computeUsingSeveralWorkers( """ let v = [] for ix in sequence(50*1000*1000) v = v :: ix v """, s3, 4, timeout=240, returnSimulation=True) try: def test(): for worker, vdm, eventHandler in simulation.workersVdmsAndEventHandlers: self.assertTrue( vdm.curTotalUsedBytes() < 150 * 1024 * 1024, "We are using %s >= 150MB" % (vdm.curTotalUsedBytes() / 1024 / 1024.0)) test() finally: simulation.teardown()
def test_vectorCreateCloseToLimit(self): s3 = InMemoryS3Interface.InMemoryS3InterfaceFactory() result, simulation = self.computeUsingSeveralWorkers( """ let bytes = 2 * 1024 * 1024 * 1024 * .7 let cols = 10 let rows = Int64(bytes / cols / 8) let v = Vector.range(cols, fun(col) { Vector.range(rows) }) v ~~ {_.sum()} """, s3, 4, memoryLimitMb=500, timeout=240, returnSimulation=True, useInMemoryCache=False) try: #verify that simulation didn't write to disk for ix in range(simulation.getWorkerCount()): self.assertEqual( simulation.getWorkerVdm( ix).getOfflineCache().cacheItemCount, 0) except: simulation.dumpSchedulerEventStreams() raise finally: simulation.teardown()
def disable_createVectorAndReferenceInMultipleComputations(self): s3 = InMemoryS3Interface.InMemoryS3InterfaceFactory() result, simulation = self.computeUsingSeveralWorkers( "1+1", s3, 2, memoryLimitMb=1000, returnSimulation=True, useInMemoryCache=False) try: vecComputation = simulation.createComputation(""" let count = 1000 * 1000 * 40 let fpow = fun(p) { fun(x) { Float32( (x / 1000000000.0) ) } }; Vector.range(10) ~~ fun(p) { Vector.range(count, fpow(p)).paged } """) #we want to verify that all of these computations use the same copy of the #bigvec that we create in the 'vecComputation' instance predComp = simulation.createComputation( "dataframe.DataFrame(vecs[1,])", vecs=vecComputation) regComp = simulation.createComputation( "dataframe.DataFrame(vecs[,1])", vecs=vecComputation) predCompStr = simulation.createComputation("String(pred)", pred=predComp) regCompStr = simulation.createComputation("String(reg)", reg=regComp) vecSumComp = simulation.createComputation("vecs ~~ {_.sum()}", vecs=vecComputation) simulation.submitComputation(predCompStr) simulation.submitComputation(regCompStr) simulation.submitComputation(vecSumComp) r1 = simulation.waitForAnyResult(timeout=60.0) r2 = simulation.waitForAnyResult(timeout=60.0) r3 = simulation.waitForAnyResult(timeout=60.0) #verify that simulation didn't write to disk sprt = simulation.getWorker(0).getSystemwidePageRefcountTracker() totalGb = sum([x.bytecount for x in sprt.getAllActivePages() ]) / 1024.0 / 1024.0 / 1024.0 logging.critical("%s", sprt.getViewOfSystem()) self.assertTrue(totalGb < 2.0, totalGb) finally: simulation.teardown()
def stringToInt64ParsingTest(self, threads, testName): s3 = InMemoryS3Interface.InMemoryS3InterfaceFactory() #we wish we could actually test that we achieve saturation here but we can't yet. text = """ let doALoop = fun(x) { //pass 's' through a vector so that the compiler can't tell what it is let s = ["2013"][0]; let res = 0 for ix in sequence(x) { if (ix == 0) s = s + String(ix) res = res + Int64(s) + ix } res }; Vector.range(__thread_count__) ~~ {doALoop(20000000 + _)} """.replace("__thread_count__", str(threads)) _, simulation = \ self.computeUsingSeveralWorkers( "1+1", s3, 1, timeout = 240, memoryLimitMb = 55 * 1024, threadCount = 30, returnSimulation = True, useInMemoryCache = False ) t0 = time.time() _, simulation = \ self.computeUsingSeveralWorkers( "1+1", s3, 1, timeout = 240, memoryLimitMb = 55 * 1024, threadCount = 30, returnSimulation = True, useInMemoryCache = False ) try: t0 = time.time() result = simulation.compute(text, timeout=240) totalTimeToReturnResult = time.time() - t0 PerformanceTestReporter.recordTest(testName, totalTimeToReturnResult, None) finally: simulation.teardown()
def test_sortVec2(self): s3 = InMemoryS3Interface.InMemoryS3InterfaceFactory() result = self.computeUsingSeveralWorkers(""" let v = Vector.range(50000, fun(ix) { ix / 10 } ); sorting.isSorted(sort(v)) """, s3, 4) self.assertEqual(result.asResult.result.pyval, True)
def gbmRegressionFittingTest(self, nRows, nColumns, depth, nThreads, maxBoosts): testName = self.getTestName(nRows, nColumns, depth, maxBoosts, nThreads) s3 = InMemoryS3Interface.InMemoryS3InterfaceFactory() result, simulation = InMemoryCumulusSimulation.computeUsingSeveralWorkers( self.dataGenerationScript(nRows, nColumns), s3, 1, timeout=360, memoryLimitMb=30 * 1024, threadCount=nThreads, returnSimulation=True, useInMemoryCache=False) try: self.assertTrue(result.isResult()) dfPredictors, dfResponse = result.asResult.result fitter = simulation.compute( self.regressionScript(depth, 1), timeout=360, dfResponse=dfResponse, dfPredictors=dfPredictors).asResult.result t0 = time.time() for nBoosts in range(1, maxBoosts): testName = self.getTestName(nRows, nColumns, depth, nBoosts, nThreads) predictions = simulation.compute( "fitter.predictionsAndPseudoresiduals()", timeout=360, fitter=fitter).asResult.result totalTimeToReturnResult = time.time() - t0 PerformanceTestReporter.recordTest(testName + "_predict", totalTimeToReturnResult, None) fitter = simulation.compute( "fitter.nextGivenPredictions(predictions)", timeout=360, fitter=fitter, predictions=predictions).asResult.result totalTimeToReturnResult = time.time() - t0 PerformanceTestReporter.recordTest(testName, totalTimeToReturnResult, None) finally: simulation.teardown()
def test_effectiveParallelism(self): s3 = InMemoryS3Interface.InMemoryS3InterfaceFactory() #do a burn-in run self.computeUsingSeveralWorkers(""" let v = Vector.range(5000000, { (1,_) } ); let f = fun(ix) { let res = 0 for x in sequence( (ix - 2000) >>> 0, ix ) res = res + size(v[x]) res } Vector.range(size(v), f).sum() """, s3, 2, wantsStats=True, timeout=240, memoryLimitMb=500)[1] t0 = time.time() stats = self.computeUsingSeveralWorkers(""" let v = Vector.range(5000000, { (1,_) } ); let f = fun(ix) { let res = 0 for x in sequence( (ix - 2000) >>> 0, ix ) res = res + size(v[x]) res } Vector.range(size(v), f).sum() """, s3, 2, wantsStats=True, timeout=240, memoryLimitMb=500)[1] timeElapsed = time.time() - t0 totalTime = stats.timeSpentInInterpreter + stats.timeSpentInCompiler effParallelism = totalTime / timeElapsed PerformanceTestReporter.recordTest( "python.cumulus.EffectiveParallelism.elapsed", timeElapsed, None) PerformanceTestReporter.recordTest( "python.cumulus.EffectiveParallelism.effectiveCores", effParallelism, {}, units='count')
def test_CalculationRicochet(self): s3 = InMemoryS3Interface.InMemoryS3InterfaceFactory() text = """ let f = fun(ct, seed = 1) { let x = 0 let res = [] let it = iterator(math.random.UniformReal(0, size(v), seed)) for ix in sequence(ct) { let x = Int64(pull it) res = res :: (x / Float64(size(v)), v[x]) } return res } v[2] f(__count__,__seed__) """ vResult, sim = InMemoryCumulusSimulation.computeUsingSeveralWorkers( "Vector.range(125000000, math.log)", s3, 4, timeout=120, memoryLimitMb=400, threadCount=1, useInMemoryCache=True, returnSimulation=True) try: v = vResult.asResult.result t0 = time.time() sim.compute(text.replace("__seed__", "1").replace("__count__", "1000"), timeout=120, v=v) PerformanceTestReporter.recordTest( "python.InMemoryCumulus.Ricochet1000.Pass1", time.time() - t0, None) t0 = time.time() sim.compute(text.replace("__seed__", "2").replace("__count__", "1000"), timeout=120, v=v) PerformanceTestReporter.recordTest( "python.InMemoryCumulus.Ricochet1000.Pass2", time.time() - t0, None) finally: sim.teardown()
def loadCheckpointFromFreshSimulationTest(self, calculationText, timestampsPerPassList, clientCount=1, timestep=1.0): s3 = InMemoryS3Interface.InMemoryS3InterfaceFactory() statuses = [] viewFactory = None for timestampsThisPass in timestampsPerPassList: simulation = InMemoryCumulusSimulation.InMemoryCumulusSimulation( 4, #worker count clientCount, memoryPerWorkerMB=100, threadsPerWorker=2, s3Service=s3, sharedStateViewFactory=viewFactory) viewFactory = simulation.sharedStateViewFactory statusesThisPass = [] try: self.assertTrue(simulation.waitForGlobalScheduler(timeout=2.0)) simulation.getGlobalScheduler().setCheckpointStatusInterval( 0.1) for ix in range(clientCount): simulation.submitComputationOnClient(ix, calculationText) for subPass in range(timestampsThisPass): time.sleep(timestep) statusesThisPass.append( self.timeElapsedOfMostRecentCheckpoints(simulation)) simulation.getGlobalScheduler( ).triggerFullCheckpointsOnOutstandingComputations() self.waitForFullCheckpoint(simulation) statusesThisPass.append( self.timeElapsedOfMostRecentCheckpoints(simulation)) finally: for ix in range(4): simulation.getWorker(ix).dumpStateToLog() simulation.teardown() statuses.append(statusesThisPass) return statuses