Exemple #1
0
    def test_disk_scans(self):
        s3 = ActualS3Interface.ActualS3InterfaceFactory()
        objectStore = S3ObjectStore.S3ObjectStore(
            s3, Setup.config().userDataS3Bucket, prefix="test_object_cache/")

        _, simulation = InMemoryCumulusSimulation.computeUsingSeveralWorkers(
            "1+1",
            s3,
            1,
            memoryLimitMb=1 * 1024,
            threadCount=30,
            returnSimulation=True,
            ioTaskThreadOverride=8,
            objectStore=objectStore,
            useInMemoryCache=False  #use an actual disk cache for this
        )

        try:
            gigabytes = 8

            t0 = time.time()

            resultVectors = []
            for ix in range(gigabytes):
                result = simulation.compute("Vector.range(125000000 + %s)" %
                                            ix,
                                            timeout=120)
                resultVectors.append(result.asResult.result)

            t1 = time.time()

            intResults = []
            for vec in resultVectors:
                result = simulation.compute("v.sum()", timeout=120, v=vec)
                intResults.append(result.asResult.result.pyval)

            self.assertTrue(len(intResults) == gigabytes)

            PerformanceTestReporter.recordTest("python.BigBox.Disk.Write.10GB",
                                               t1 - t0, None)

            PerformanceTestReporter.recordTest(
                "python.BigBox.Disk.WriteAndScan.%sGB" % gigabytes,
                time.time() - t0, None)
        finally:
            simulation.teardown()
Exemple #2
0
 def createObjectStore(s3Service):
     return S3ObjectStore.S3ObjectStore(s3Service,
                                        Setup.config().userDataS3Bucket,
                                        prefix="test_object_cache/")
    def __init__(self,
                workerCount,
                clientCount,
                memoryPerWorkerMB,
                threadsPerWorker,
                s3Service,
                objectStore=None,
                callbackScheduler=None,
                sharedStateViewFactory=None,
                ioTaskThreadOverride=None,
                useInMemoryCache=True,
                channelThroughputMBPerSecond=None,
                pageSizeOverride=None,
                disableEventHandler=False,
                machineIdHashSeed=None
                ):
        self.useInMemoryCache = useInMemoryCache
        self.machineIdHashSeed = machineIdHashSeed

        if not self.useInMemoryCache:
            self.diskCacheCount = 0
            if os.getenv("CUMULUS_DATA_DIR") is None:
                self.diskCacheStorageDir = tempfile.mkdtemp()
            else:
                self.diskCacheStorageDir = os.path.join(
                    os.getenv("CUMULUS_DATA_DIR"),
                    str(uuid.uuid4())
                    )
        self.ioTaskThreadOverride = ioTaskThreadOverride
        self.workerCount = 0
        self.disableEventHandler = disableEventHandler
        self.clientCount = 0
        self.memoryPerWorkerMB = memoryPerWorkerMB
        self.threadsPerWorker = threadsPerWorker
        self.s3Service = s3Service
        self.objectStore = objectStore
        if self.objectStore is None:
            s3 = s3Service()
            if isinstance(s3, InMemoryS3Interface.InMemoryS3Interface):
                objectStoreBucket = "object_store_bucket"
                s3.setKeyValue(objectStoreBucket, 'dummyKey', 'dummyValue')
                s3.deleteKey(objectStoreBucket, 'dummyKey')
            else:
                objectStoreBucket = Setup.config().userDataS3Bucket
            self.objectStore = S3ObjectStore.S3ObjectStore(
                s3Service,
                objectStoreBucket,
                prefix="test/")
        self.callbackScheduler = callbackScheduler or CallbackScheduler.singletonForTesting()
        self.sharedStateViewFactory = (
            sharedStateViewFactory or createInMemorySharedStateViewFactory(self.callbackScheduler)
            )
        self.channelThroughputMBPerSecond = channelThroughputMBPerSecond
        self.resultVDM = ForaNative.VectorDataManager(self.callbackScheduler, 5 * 1024 * 1024)
        self.pageSizeOverride = pageSizeOverride

        self.rateLimitedChannelGroupsForEachListener = []
        self.workersVdmsAndEventHandlers = []
        self.machineIds = []
        self.machineIdsEverAllocated = 0
        self.clientsAndVdms = []
        self.loadingServices = []
        self.clientTeardownGates = []
        self.workerTeardownGates = []


        for ix in range(workerCount):
            self.addWorker()
        for ix in range(clientCount):
            self.addClient()

        if clientCount:
            self.listener = self.getClient(0).createListener()
        else:
            self.listener = None
Exemple #4
0
def computeUsingSeveralWorkers(expressionText,
                               s3Service,
                               count,
                               objectStore=None,
                               wantsStats=False,
                               timeout=10,
                               returnEverything=False,
                               memoryLimitMb=100,
                               blockUntilConnected=False,
                               keepSimulationAlive=False,
                               sharedStateViewFactory=None,
                               threadCount=2):
    if keepSimulationAlive:
        assert returnEverything, \
            "can't keep the simulation alive and not return it. how would you shut it down?"

    callbackSchedulerToUse = CallbackScheduler.singletonForTesting()

    if sharedStateViewFactory is None:
        sharedStateViewFactory = createInMemorySharedStateViewFactory(
            callbackSchedulerToUse=callbackSchedulerToUse)

    workersVdmsAndEventHandlers, clientsAndVdms, viewFactory = (
        createWorkersAndClients(count,
                                1,
                                sharedStateViewFactory,
                                memoryLimitMb=memoryLimitMb,
                                threadCount=threadCount))

    client = clientsAndVdms[0][0]
    clientVdm = clientsAndVdms[0][1]

    loadingServices = []

    for ix in range(len(workersVdmsAndEventHandlers)):
        worker = workersVdmsAndEventHandlers[ix][0]
        workerVdm = workersVdmsAndEventHandlers[ix][1]

        s3InterfaceFactory = s3Service.withMachine(ix)
        if objectStore is None:
            objectStore = S3ObjectStore.S3ObjectStore(
                s3InterfaceFactory,
                Setup.config().userDataS3Bucket,
                prefix="test/")

        loadingService = PythonIoTaskService.PythonIoTaskService(
            s3InterfaceFactory, objectStore, workerVdm,
            worker.getExternalDatasetRequestChannel(
                callbackSchedulerToUse).makeQueuelike(callbackSchedulerToUse))
        loadingService.startService()

        loadingServices.append(loadingService)

    if blockUntilConnected:
        for worker, vdm, eventHandler in workersVdmsAndEventHandlers:
            blockUntilWorkerIsConnected(worker, 2.0)

    if isinstance(expressionText, CumulusNative.ComputationDefinition):
        computationDefinition = expressionText
    else:
        computationDefinition = (createComputationDefinition(
            FORA.extractImplValContainer(
                FORA.eval("fun() {" + expressionText + " } ")),
            ForaNative.makeSymbol("Call")))

    teardownGates = []
    for client, vdm in clientsAndVdms:
        teardownGates.append(vdm.getVdmmTeardownGate())

    for worker, vdm, eventHandler in workersVdmsAndEventHandlers:
        teardownGates.append(vdm.getVdmmTeardownGate())

    simulationDict = {
        "result": None,
        "timedOut": None,
        "stats": None,
        "clientsAndVdms": clientsAndVdms,
        "workersVdmsAndEventHandlers": workersVdmsAndEventHandlers,
        "s3Service": s3Service,
        "loadingServices": loadingServices,
        "sharedStateViewFactory": sharedStateViewFactory,
        "client": client,
        "teardownGates": teardownGates
    }
    try:
        listener = client.createListener()

        computationSubmitTime = time.time()

        computationId = client.createComputation(computationDefinition)

        client.setComputationPriority(computationId,
                                      CumulusNative.ComputationPriority(1))

        if returnEverything:
            valAndStatsOrNone = waitForResult(listener,
                                              computationId,
                                              clientVdm,
                                              timeout=timeout,
                                              wantsStats=True)

            computationReturnTime = time.time()

            if valAndStatsOrNone is None:
                #we timed out
                val = None
                stats = None
                timedOut = True
            else:
                val, stats = valAndStatsOrNone
                timedOut = False

            simulationDict.update({
                "result":
                val,
                "stats":
                stats,
                "timedOut":
                timedOut,
                "computationId":
                computationId,
                "listener":
                listener,
                "totalTimeToReturnResult":
                computationReturnTime - computationSubmitTime
            })

            return simulationDict
        else:
            return waitForResult(listener,
                                 computationId,
                                 clientVdm,
                                 timeout=timeout,
                                 wantsStats=wantsStats)
    finally:
        if not keepSimulationAlive:
            teardownSimulation(simulationDict)