Example #1
0
    def handleCompleteMultipartS3Upload(self, request):
        PythonIoTasks.completeMultipartS3Upload(
            self.s3Interface, self.outOfProcessDownloaderPool,
            request.asCompleteMultipartS3Upload.credentials.bucketname,
            request.asCompleteMultipartS3Upload.credentials.keyname,
            request.asCompleteMultipartS3Upload.credentials.awsAccessKey,
            request.asCompleteMultipartS3Upload.credentials.awsSecretKey,
            request.asCompleteMultipartS3Upload.credentials.region,
            request.asCompleteMultipartS3Upload.uploadId)

        self.datasetRequestChannel_.write(
            CumulusNative.PythonIoTaskResponse.Success(request.guid))
Example #2
0
    def handleLoadExternalDatasetRequest(self, request, guid):
        t0 = time.time()

        PythonIoTasks.loadExternalDataset(self.s3Interface, request, self.vdm_,
                                          self.outOfProcessDownloaderPool)

        logging.info(
            "PythonIoTaskService succeeded in loading %s in %s. tasks=%s",
            request,
            time.time() - t0, self.totalTasks)

        self.datasetRequestChannel_.write(
            CumulusNative.PythonIoTaskResponse.Success(guid))
Example #3
0
    def deletePersistedObject(self, keyname):
        try:
            PythonIoTasks.deletePersistedObject(
                keyname, self.objectStore, self.outOfProcessDownloaderPool)
        except:
            message = "Error deleting serialized object: %s:\n%s" % (
                keyname, traceback.format_exc())

            #see if the object shows up as a listed object
            try:
                if len(self.objectStore.listValues(keyname)) == 0:
                    #if not, then we can consider the deletion a success
                    return
            except:
                message += "\n\nError while trying to list object:\n%s" % (
                    traceback.format_exc())
                return message
Example #4
0
    def handleCompleteMultipartS3Upload(self, request):
        PythonIoTasks.completeMultipartS3Upload(
            self.s3Interface,
            self.outOfProcessDownloaderPool,
            request.asCompleteMultipartS3Upload.credentials.bucketname,
            request.asCompleteMultipartS3Upload.credentials.keyname,
            request.asCompleteMultipartS3Upload.credentials.awsAccessKey,
            request.asCompleteMultipartS3Upload.credentials.awsSecretKey,
            request.asCompleteMultipartS3Upload.credentials.region,
            request.asCompleteMultipartS3Upload.uploadId
            )

        self.datasetRequestChannel_.write(
            CumulusNative.PythonIoTaskResponse.Success(
                request.guid
                )
            )
Example #5
0
    def handleLoadExternalDatasetAsForaValue(self, toRequest):
        externalDataset = toRequest.asLoadExternalDatasetAsForaValue.toLoad

        result = PythonIoTasks.loadExternalDatasetAsForaValue(
            externalDataset, self.vdm_)

        self.datasetRequestChannel_.write(
            CumulusNative.PythonIoTaskResponse.DatasetAsForaValue(
                toRequest.guid, result))
Example #6
0
 def writeToObjectStore(self, persistObjectRequest):
     try:
         dataSize = PythonIoTasks.persistObject(
             persistObjectRequest, self.objectStore,
             self.outOfProcessDownloaderPool)
         return None, dataSize
     except:
         message = "Error writing serialized object: %s:\n%s" % (
             persistObjectRequest.objectPath, traceback.format_exc())
         return message, None
Example #7
0
    def listPersistedObjects(self, prefix):
        try:
            result = PythonIoTasks.listPersistedObjects(
                prefix, self.objectStore, self.outOfProcessDownloaderPool)
            return None, result
        except:
            message = "Error listing persisted objects: %s:\n%s" % (
                prefix, traceback.format_exc())

            return message, None
Example #8
0
    def deletePersistedObject(self, keyname):
        try:
            PythonIoTasks.deletePersistedObject(keyname,
                                                self.objectStore,
                                                self.outOfProcessDownloaderPool)
        except:
            message = "Error deleting serialized object: %s:\n%s" % (
                keyname,
                traceback.format_exc()
                )

            #see if the object shows up as a listed object
            try:
                if len(self.objectStore.listValues(keyname)) == 0:
                    #if not, then we can consider the deletion a success
                    return
            except:
                message += "\n\nError while trying to list object:\n%s" % (
                    traceback.format_exc()
                    )
                return message
Example #9
0
 def writeToObjectStore(self, persistObjectRequest):
     try:
         dataSize = PythonIoTasks.persistObject(persistObjectRequest,
                                                self.objectStore,
                                                self.outOfProcessDownloaderPool)
         return None, dataSize
     except:
         message = "Error writing serialized object: %s:\n%s" % (
             persistObjectRequest.objectPath,
             traceback.format_exc()
             )
         return message, None
Example #10
0
    def handleInitiateMultipartS3Upload(self, request):
        uploadId = PythonIoTasks.initiateMultipartS3Upload(
            self.s3Interface, self.outOfProcessDownloaderPool,
            request.asInitiateMultipartS3Upload.credentials.bucketname,
            request.asInitiateMultipartS3Upload.credentials.keyname,
            request.asInitiateMultipartS3Upload.credentials.awsAccessKey,
            request.asInitiateMultipartS3Upload.credentials.awsSecretKey,
            request.asInitiateMultipartS3Upload.credentials.region)

        self.datasetRequestChannel_.write(
            CumulusNative.PythonIoTaskResponse.MultipartS3UploadInitiated(
                request.guid, uploadId))
Example #11
0
    def handleLoadExternalDatasetRequest(self, request, guid):
        t0 = time.time()

        PythonIoTasks.loadExternalDataset(
            self.s3Interface,
            request,
            self.vdm_,
            self.outOfProcessDownloaderPool
            )

        logging.info(
            "PythonIoTaskService succeeded in loading %s in %s. tasks=%s",
            request,
            time.time() - t0,
            self.totalTasks
            )

        self.datasetRequestChannel_.write(
            CumulusNative.PythonIoTaskResponse.Success(
                guid
                )
            )
Example #12
0
    def listPersistedObjects(self, prefix):
        try:
            result = PythonIoTasks.listPersistedObjects(prefix,
                                                          self.objectStore,
                                                          self.outOfProcessDownloaderPool)
            return None, result
        except:
            message = "Error listing persisted objects: %s:\n%s" % (
                prefix,
                traceback.format_exc()
                )

            return message, None
Example #13
0
    def handleLoadExternalDatasetAsForaValue(self, toRequest):
        externalDataset = toRequest.asLoadExternalDatasetAsForaValue.toLoad

        result = PythonIoTasks.loadExternalDatasetAsForaValue(
            externalDataset,
            self.vdm_
            )

        self.datasetRequestChannel_.write(
            CumulusNative.PythonIoTaskResponse.DatasetAsForaValue(
                toRequest.guid,
                result
                )
            )
Example #14
0
    def handleInitiateMultipartS3Upload(self, request):
        uploadId = PythonIoTasks.initiateMultipartS3Upload(
            self.s3Interface,
            self.outOfProcessDownloaderPool,
            request.asInitiateMultipartS3Upload.credentials.bucketname,
            request.asInitiateMultipartS3Upload.credentials.keyname,
            request.asInitiateMultipartS3Upload.credentials.awsAccessKey,
            request.asInitiateMultipartS3Upload.credentials.awsSecretKey,
            request.asInitiateMultipartS3Upload.credentials.region
            )

        self.datasetRequestChannel_.write(
            CumulusNative.PythonIoTaskResponse.MultipartS3UploadInitiated(
                request.guid,
                uploadId
                )
            )
Example #15
0
    def extractPersistedObject(self, keyname):
        try:
            result = PythonIoTasks.extractPersistedObject(
                keyname, self.objectStore, self.outOfProcessDownloaderPool)
            return None, result
        except:
            message = "Error reading serialized object: %s:\n%s" % (
                keyname, traceback.format_exc())

            #see if the object shows up as a listed object
            try:
                if len(self.objectStore.listValues(keyname)) == 0:
                    return None, None
            except:
                message += "\n\nError while trying to list serialized object:\n%s" % (
                    traceback.format_exc())

            return message, None
Example #16
0
    def extractPersistedObject(self, keyname):
        try:
            result = PythonIoTasks.extractPersistedObject(keyname,
                                                          self.objectStore,
                                                          self.outOfProcessDownloaderPool)
            return None, result
        except:
            message = "Error reading serialized object: %s:\n%s" % (
                keyname,
                traceback.format_exc()
                )

            #see if the object shows up as a listed object
            try:
                if len(self.objectStore.listValues(keyname)) == 0:
                    return None, None
            except:
                message += "\n\nError while trying to list serialized object:\n%s" % (
                    traceback.format_exc()
                    )

            return message, None
Example #17
0
    def computeOneNode(self, node):
        """push 'node' one step further in its computation requirements

        self.intermediates_[node] either contains a list of values to be computed
        or an execution context
        """

        if self.intermediates_[node] is None:
            context = self.grabContext()

            #the intermediates can either be None or
            #an execution context. in this case, since its a list
            #we have not even started computation yet, so we need to create
            #an ExecutionContext and begin computing
            with self.contextEnterer(context):
                context.resetInterruptState()
                if isinstance(node, tuple):
                    with freestoreLock:
                        #this operation may be copying values in the freestore as we're
                        #updating them, so we need to do it under a lock
                        context.placeInEvaluationStateWithoutRenamingMutableVectors(
                            ImplValContainer_(tuple(node))
                            )
                    context.compute()

                elif isinstance(node, SplitSubcomputation):
                    context.resumePausedComputation(node.pausedComputationTree)
                    context.resetInterruptState()
                    context.compute()
                else:
                    assert False, "don't know what to do with node of type %s" % node

            self.intermediates_[node] = context

        elif isinstance(self.intermediates_[node], FORANative.ExecutionContext):
            #this was a cacherequest node, and if we're here, we filled them
            #all out
            context = self.intermediates_[node]

            req = context.getCacheRequest()

            if CacheSemantics.isCacheRequestWithResult(req):
                result = CacheSemantics.getCacheRequestComputationResult(req)

                with self.contextEnterer(context):
                    context.resetInterruptState()
                    context.addCachecallResult(result)
                    context.compute()
            else:
                cacheCalls = [x.extractApplyTuple() for x in CacheSemantics.processCacheCall(req)]

                res = []
                exception = None
                for t in cacheCalls:
                    assert t in self.finishedValuesAndTimeElapsed_, (
                        "Couldn't find result for: %s in %s" %
                            (t,"\n".join([str(x) for x in self.finishedValuesAndTimeElapsed_.keys()]))
                        )
                    if self.finishedValuesAndTimeElapsed_[t][0].isException():
                        if exception is None:
                            exception = self.finishedValuesAndTimeElapsed_[t][0]
                    else:
                        res.append(self.finishedValuesAndTimeElapsed_[t][0].asResult.result)

                with self.contextEnterer(context):
                    if exception:
                        context.resetInterruptState()
                        context.addCachecallResult(exception)
                        context.compute()
                    else:
                        context.resetInterruptState()
                        context.addCachecallResult(
                            ComputationResult_.Result(
                                ImplValContainer_(tuple(res))
                                )
                            )
                        context.compute()
        else:
            #this was a split request
            splitResult = self.intermediates_[node]

            for ix in range(len(splitResult.splits)):
                child = splitResult.childComputations[ix]

                assert child in self.finishedValuesAndTimeElapsed_

                value = self.finishedValuesAndTimeElapsed_[child][0]
                timeElapsed = self.finishedValuesAndTimeElapsed_[child][1]
                del self.finishedValuesAndTimeElapsed_[child]

                if value.isFailure():
                    self.finishNode_(node, value)
                    self.checkContextBackIn(splitResult.context)
                    return
                else:
                    splitResult.context.absorbSplitResult(
                        splitResult.splits[ix].computationHash, 
                        value,
                        timeElapsed
                        )

            with self.lock_:
                context = splitResult.context
                context.resetInterruptState()
                self.intermediates_[node] = context
                with self.contextEnterer(context):
                    context.compute()

        while True:
            if context.isFinished():
                result = context.getFinishedResult()
                timeElapsed = context.getTotalTimeElapsed()

                self.checkContextBackIn(context)

                #now, wake up any dependencies
                self.finishNode_(node, result, timeElapsed)
                return

            elif context.isVectorLoad():
                for vectorToLoad in context.getVectorLoadAsVDIDs():
                    toLoad = None
                    loaded = False

                    if self.offlineCache_ is not None:
                        toLoad = self.offlineCache_.loadIfExists(vectorToLoad.page)
                        if toLoad is not None:
                            self.vdm_.loadSerializedVectorPage(vectorToLoad.page, toLoad)
                            loaded = True

                    if not loaded and vectorToLoad.isExternal():
                        #is this an external dataset, attempt to load it from there
                        PythonIoTasks.loadExternalDataset(
                            getCurrentS3Interface(),
                            vectorToLoad,
                            self.vdm_,
                            self.inProcessDownloader
                            )
                        loaded = True

                    assert loaded, "lost the definition for VDID: %s" % vectorToLoad

                with self.contextEnterer(context):
                    context.resetInterruptState()
                    context.compute()
                #go back around and try again

            elif context.isInterrupted():
                toResume = None
                if self.checkShouldSplit(context):
                    splits = context.splitComputation()

                    if splits is not None:
                        with self.lock_:
                            splitResult = self.computeIntermediatesForSplitResult(node, splits, context)
                            self.intermediates_[node] = splitResult
                            return

                #if we're here, then we didn't split
                #go back around and try again
                with self.contextEnterer(context):
                    if toResume is not None:
                        context.resumePausedComputation(toResume)
                    context.resetInterruptState()
                    context.compute()
                    

            elif context.isCacheRequest():
                #these are thew new dependencies
                req = context.getCacheRequest()

                deps = set()

                if CacheSemantics.isCacheRequestWithResult(req):
                    pass
                else:
                    cacheCalls = [x.extractApplyTuple() for x in CacheSemantics.processCacheCall(req)]

                    with self.lock_:
                        #register any dependencies
                        for t in cacheCalls:
                            if t not in self.finishedValuesAndTimeElapsed_ and t not in self.intermediates_:
                                #its a new request
                                self.intermediates_[t] = None
                                self.completable_.put(t)
                                self.watchers_[t] = threading.Event()
                            if t not in self.finishedValuesAndTimeElapsed_:
                                deps.add(t)
                        self.dependencies_[node] = deps

                if not deps:
                    #we could go again
                    with self.lock_:
                        self.completable_.put(node)
                return
Example #18
0
    def computeOneNode(self, node):
        """push 'node' one step further in its computation requirements

        self.intermediates_[node] either contains a list of values to be computed
        or an execution context
        """

        if self.intermediates_[node] is None:
            context = self.grabContext()

            #the intermediates can either be None or
            #an execution context. in this case, since its a list
            #we have not even started computation yet, so we need to create
            #an ExecutionContext and begin computing
            with self.contextEnterer(context):
                context.resetInterruptState()
                if isinstance(node, tuple):
                    with freestoreLock:
                        #this operation may be copying values in the freestore as we're
                        #updating them, so we need to do it under a lock
                        context.placeInEvaluationStateWithoutRenamingMutableVectors(*node)
                    context.resume()

                elif isinstance(node, FORANative.PausedComputation):
                    context.resumePausedComputation(node)
                    context.resetInterruptState()
                    context.resume()
                else:
                    assert False, "don't know what to do with node of type %s" % node

            self.intermediates_[node] = context

        elif isinstance(self.intermediates_[node], FORANative.ExecutionContext):
            #this was a cacherequest node, and if we're here, we filled them
            #all out
            context = self.intermediates_[node]

            req = context.getCacheRequest()

            if CacheSemantics.isVectorCacheLoadRequest(req):
                with self.contextEnterer(context):
                    context.resetInterruptState()
                    context.resume(
                        ComputationResult_.Result(
                            ImplValContainer_(),
                            ImplValContainer_()
                            )
                        )
            elif CacheSemantics.isCacheRequestWithResult(req):
                result = CacheSemantics.getCacheRequestComputationResult(req)

                with self.contextEnterer(context):
                    context.resetInterruptState()
                    context.resume(result)
            else:
                cacheCalls = [x.extractApplyTuple() for x in CacheSemantics.processCacheCall(req)]

                res = []
                exception = None
                for t in cacheCalls:
                    assert t in self.finishedValues_, (
                        "Couldn't find result for: %s in %s" %
                            (t,"\n".join([str(x) for x in self.finishedValues_.keys()]))
                        )
                    if self.finishedValues_[t].isException():
                        if exception is None:
                            exception = self.finishedValues_[t]
                    else:
                        res.append(self.finishedValues_[t].asResult.result)

                with self.contextEnterer(context):
                    if exception:
                        context.resetInterruptState()
                        context.resume(exception)
                    else:
                        context.resetInterruptState()
                        context.resume(
                            ComputationResult_.Result(
                                ImplValContainer_(tuple(res)),
                                ImplValContainer_()
                                )
                            )
        else:
            #this was a split request
            splitResult, splitComputationLog = self.intermediates_[node]

            for slotComputation in splitResult.submittedComputations():
                assert slotComputation in self.finishedValues_

                value = self.finishedValues_[slotComputation]

                if value.isFailure():
                    self.finishNode_(node, value)
                    return
                else:
                    splitResult.futuresSplitResult.slotCompleted(
                        splitResult.computationsToSlotIndices[slotComputation],
                        value)
                    del splitResult.computationsToSlotIndices[slotComputation]

            submittableFutures = splitResult.futuresSplitResult.indicesOfSubmittableFutures()

            if len(submittableFutures) == 0:
                context = self.grabContext()

                toResumeWith = splitResult.futuresSplitResult.getFinalResult()
                context.resumePausedComputation(toResumeWith)
                context.resetInterruptState()
                self.intermediates_[node] = context
                with self.contextEnterer(context):
                    context.resume()

            else:
                with self.lock_:
                    futuresSplitResult = splitResult.futuresSplitResult

                    isFinished, result = self.findMeatyPausedComputations(futuresSplitResult)

                    if not isFinished:
                        splitResult = self.computeIntermediatesForSplitResult(
                            node, futuresSplitResult, result)

                        self.intermediates_[node] = (splitResult, [])

                        return

                    else:
                        toResume = result
                        context = self.grabContext()
                        context.resumePausedComputation(toResume)
                        context.resetInterruptState()
                        self.intermediates_[node] = context
                        with self.contextEnterer(context):
                            context.resume()

        while True:
            if context.isFinished():
                result = context.getFinishedResult()
                self.checkContextBackIn(context)

                #now, wake up any dependencies
                self.finishNode_(node, result)
                return

            elif context.isVectorLoad():
                for vectorToLoad in context.getVectorLoadAsVDIDs():
                    toLoad = None
                    loaded = False

                    if self.offlineCache_ is not None:
                        toLoad = self.offlineCache_.loadIfExists(vectorToLoad.page)
                        if toLoad is not None:
                            self.vdm_.loadSerializedVectorPage(vectorToLoad.page, toLoad)
                            loaded = True

                    if not loaded and vectorToLoad.isExternal():
                        #is this an external dataset, attempt to load it from there
                        PythonIoTasks.loadExternalDataset(
                            getCurrentS3Interface(),
                            vectorToLoad,
                            self.vdm_,
                            self.inProcessDownloader
                            )
                        loaded = True

                    assert loaded, "lost the definition for VDID: %s" % vectorToLoad

                with self.contextEnterer(context):
                    context.resetInterruptState()
                    context.resume()
                #go back around and try again

            elif context.isInterrupted():
                toResume = None
                if self.checkShouldSplit(context):
                    futuresSplitResult = context.splitWithFutures()

                    if futuresSplitResult is not None:
                        with self.lock_:
                            futuresSplitResult.disallowRepeatNodes()

                            isFinished, result = self.findMeatyPausedComputations(futuresSplitResult)

                            if not isFinished:
                                splitResult = self.computeIntermediatesForSplitResult(
                                    node, futuresSplitResult, result)

                                self.intermediates_[node] = (splitResult, context.getComputationLog())

                                self.checkContextBackIn(context)
                                return
                            else:
                                toResume = result

                #if we're here, then we didn't split
                #go back around and try again
                with self.contextEnterer(context):
                    if toResume is not None:
                        context.resumePausedComputation(toResume)
                    context.resetInterruptState()
                    context.resume()


            elif context.isCacheRequest():
                #these are thew new dependencies
                req = context.getCacheRequest()

                deps = set()

                if CacheSemantics.isVectorCacheLoadRequest(req):
                    pass
                elif CacheSemantics.isCacheRequestWithResult(req):
                    pass
                else:
                    cacheCalls = [x.extractApplyTuple() for x in CacheSemantics.processCacheCall(req)]

                    with self.lock_:
                        #register any dependencies
                        for t in cacheCalls:
                            if t not in self.finishedValues_ and t not in self.intermediates_:
                                #its a new request
                                self.intermediates_[t] = None
                                self.completable_.put(t)
                                self.watchers_[t] = threading.Event()
                            if t not in self.finishedValues_:
                                deps.add(t)
                        self.dependencies_[node] = deps

                if not deps:
                    #we could go again
                    with self.lock_:
                        self.completable_.put(node)
                return