def test_disk_scans(self): s3 = ActualS3Interface.ActualS3InterfaceFactory() objectStore = S3ObjectStore.S3ObjectStore( s3, Setup.config().userDataS3Bucket, prefix="test_object_cache/") _, simulation = InMemoryCumulusSimulation.computeUsingSeveralWorkers( "1+1", s3, 1, memoryLimitMb=1 * 1024, threadCount=30, returnSimulation=True, ioTaskThreadOverride=8, objectStore=objectStore, useInMemoryCache=False #use an actual disk cache for this ) try: gigabytes = 8 t0 = time.time() resultVectors = [] for ix in range(gigabytes): result = simulation.compute("Vector.range(125000000 + %s)" % ix, timeout=120) resultVectors.append(result.asResult.result) t1 = time.time() intResults = [] for vec in resultVectors: result = simulation.compute("v.sum()", timeout=120, v=vec) intResults.append(result.asResult.result.pyval) self.assertTrue(len(intResults) == gigabytes) PerformanceTestReporter.recordTest("python.BigBox.Disk.Write.10GB", t1 - t0, None) PerformanceTestReporter.recordTest( "python.BigBox.Disk.WriteAndScan.%sGB" % gigabytes, time.time() - t0, None) finally: simulation.teardown()
def createObjectStore(s3Service): return S3ObjectStore.S3ObjectStore(s3Service, Setup.config().userDataS3Bucket, prefix="test_object_cache/")
def __init__(self, workerCount, clientCount, memoryPerWorkerMB, threadsPerWorker, s3Service, objectStore=None, callbackScheduler=None, sharedStateViewFactory=None, ioTaskThreadOverride=None, useInMemoryCache=True, channelThroughputMBPerSecond=None, pageSizeOverride=None, disableEventHandler=False, machineIdHashSeed=None ): self.useInMemoryCache = useInMemoryCache self.machineIdHashSeed = machineIdHashSeed if not self.useInMemoryCache: self.diskCacheCount = 0 if os.getenv("CUMULUS_DATA_DIR") is None: self.diskCacheStorageDir = tempfile.mkdtemp() else: self.diskCacheStorageDir = os.path.join( os.getenv("CUMULUS_DATA_DIR"), str(uuid.uuid4()) ) self.ioTaskThreadOverride = ioTaskThreadOverride self.workerCount = 0 self.disableEventHandler = disableEventHandler self.clientCount = 0 self.memoryPerWorkerMB = memoryPerWorkerMB self.threadsPerWorker = threadsPerWorker self.s3Service = s3Service self.objectStore = objectStore if self.objectStore is None: s3 = s3Service() if isinstance(s3, InMemoryS3Interface.InMemoryS3Interface): objectStoreBucket = "object_store_bucket" s3.setKeyValue(objectStoreBucket, 'dummyKey', 'dummyValue') s3.deleteKey(objectStoreBucket, 'dummyKey') else: objectStoreBucket = Setup.config().userDataS3Bucket self.objectStore = S3ObjectStore.S3ObjectStore( s3Service, objectStoreBucket, prefix="test/") self.callbackScheduler = callbackScheduler or CallbackScheduler.singletonForTesting() self.sharedStateViewFactory = ( sharedStateViewFactory or createInMemorySharedStateViewFactory(self.callbackScheduler) ) self.channelThroughputMBPerSecond = channelThroughputMBPerSecond self.resultVDM = ForaNative.VectorDataManager(self.callbackScheduler, 5 * 1024 * 1024) self.pageSizeOverride = pageSizeOverride self.rateLimitedChannelGroupsForEachListener = [] self.workersVdmsAndEventHandlers = [] self.machineIds = [] self.machineIdsEverAllocated = 0 self.clientsAndVdms = [] self.loadingServices = [] self.clientTeardownGates = [] self.workerTeardownGates = [] for ix in range(workerCount): self.addWorker() for ix in range(clientCount): self.addClient() if clientCount: self.listener = self.getClient(0).createListener() else: self.listener = None
def computeUsingSeveralWorkers(expressionText, s3Service, count, objectStore=None, wantsStats=False, timeout=10, returnEverything=False, memoryLimitMb=100, blockUntilConnected=False, keepSimulationAlive=False, sharedStateViewFactory=None, threadCount=2): if keepSimulationAlive: assert returnEverything, \ "can't keep the simulation alive and not return it. how would you shut it down?" callbackSchedulerToUse = CallbackScheduler.singletonForTesting() if sharedStateViewFactory is None: sharedStateViewFactory = createInMemorySharedStateViewFactory( callbackSchedulerToUse=callbackSchedulerToUse) workersVdmsAndEventHandlers, clientsAndVdms, viewFactory = ( createWorkersAndClients(count, 1, sharedStateViewFactory, memoryLimitMb=memoryLimitMb, threadCount=threadCount)) client = clientsAndVdms[0][0] clientVdm = clientsAndVdms[0][1] loadingServices = [] for ix in range(len(workersVdmsAndEventHandlers)): worker = workersVdmsAndEventHandlers[ix][0] workerVdm = workersVdmsAndEventHandlers[ix][1] s3InterfaceFactory = s3Service.withMachine(ix) if objectStore is None: objectStore = S3ObjectStore.S3ObjectStore( s3InterfaceFactory, Setup.config().userDataS3Bucket, prefix="test/") loadingService = PythonIoTaskService.PythonIoTaskService( s3InterfaceFactory, objectStore, workerVdm, worker.getExternalDatasetRequestChannel( callbackSchedulerToUse).makeQueuelike(callbackSchedulerToUse)) loadingService.startService() loadingServices.append(loadingService) if blockUntilConnected: for worker, vdm, eventHandler in workersVdmsAndEventHandlers: blockUntilWorkerIsConnected(worker, 2.0) if isinstance(expressionText, CumulusNative.ComputationDefinition): computationDefinition = expressionText else: computationDefinition = (createComputationDefinition( FORA.extractImplValContainer( FORA.eval("fun() {" + expressionText + " } ")), ForaNative.makeSymbol("Call"))) teardownGates = [] for client, vdm in clientsAndVdms: teardownGates.append(vdm.getVdmmTeardownGate()) for worker, vdm, eventHandler in workersVdmsAndEventHandlers: teardownGates.append(vdm.getVdmmTeardownGate()) simulationDict = { "result": None, "timedOut": None, "stats": None, "clientsAndVdms": clientsAndVdms, "workersVdmsAndEventHandlers": workersVdmsAndEventHandlers, "s3Service": s3Service, "loadingServices": loadingServices, "sharedStateViewFactory": sharedStateViewFactory, "client": client, "teardownGates": teardownGates } try: listener = client.createListener() computationSubmitTime = time.time() computationId = client.createComputation(computationDefinition) client.setComputationPriority(computationId, CumulusNative.ComputationPriority(1)) if returnEverything: valAndStatsOrNone = waitForResult(listener, computationId, clientVdm, timeout=timeout, wantsStats=True) computationReturnTime = time.time() if valAndStatsOrNone is None: #we timed out val = None stats = None timedOut = True else: val, stats = valAndStatsOrNone timedOut = False simulationDict.update({ "result": val, "stats": stats, "timedOut": timedOut, "computationId": computationId, "listener": listener, "totalTimeToReturnResult": computationReturnTime - computationSubmitTime }) return simulationDict else: return waitForResult(listener, computationId, clientVdm, timeout=timeout, wantsStats=wantsStats) finally: if not keepSimulationAlive: teardownSimulation(simulationDict)