def handleCompleteMultipartS3Upload(self, request): PythonIoTasks.completeMultipartS3Upload( self.s3Interface, self.outOfProcessDownloaderPool, request.asCompleteMultipartS3Upload.credentials.bucketname, request.asCompleteMultipartS3Upload.credentials.keyname, request.asCompleteMultipartS3Upload.credentials.awsAccessKey, request.asCompleteMultipartS3Upload.credentials.awsSecretKey, request.asCompleteMultipartS3Upload.credentials.region, request.asCompleteMultipartS3Upload.uploadId) self.datasetRequestChannel_.write( CumulusNative.PythonIoTaskResponse.Success(request.guid))
def handleLoadExternalDatasetRequest(self, request, guid): t0 = time.time() PythonIoTasks.loadExternalDataset(self.s3Interface, request, self.vdm_, self.outOfProcessDownloaderPool) logging.info( "PythonIoTaskService succeeded in loading %s in %s. tasks=%s", request, time.time() - t0, self.totalTasks) self.datasetRequestChannel_.write( CumulusNative.PythonIoTaskResponse.Success(guid))
def deletePersistedObject(self, keyname): try: PythonIoTasks.deletePersistedObject( keyname, self.objectStore, self.outOfProcessDownloaderPool) except: message = "Error deleting serialized object: %s:\n%s" % ( keyname, traceback.format_exc()) #see if the object shows up as a listed object try: if len(self.objectStore.listValues(keyname)) == 0: #if not, then we can consider the deletion a success return except: message += "\n\nError while trying to list object:\n%s" % ( traceback.format_exc()) return message
def handleCompleteMultipartS3Upload(self, request): PythonIoTasks.completeMultipartS3Upload( self.s3Interface, self.outOfProcessDownloaderPool, request.asCompleteMultipartS3Upload.credentials.bucketname, request.asCompleteMultipartS3Upload.credentials.keyname, request.asCompleteMultipartS3Upload.credentials.awsAccessKey, request.asCompleteMultipartS3Upload.credentials.awsSecretKey, request.asCompleteMultipartS3Upload.credentials.region, request.asCompleteMultipartS3Upload.uploadId ) self.datasetRequestChannel_.write( CumulusNative.PythonIoTaskResponse.Success( request.guid ) )
def handleLoadExternalDatasetAsForaValue(self, toRequest): externalDataset = toRequest.asLoadExternalDatasetAsForaValue.toLoad result = PythonIoTasks.loadExternalDatasetAsForaValue( externalDataset, self.vdm_) self.datasetRequestChannel_.write( CumulusNative.PythonIoTaskResponse.DatasetAsForaValue( toRequest.guid, result))
def writeToObjectStore(self, persistObjectRequest): try: dataSize = PythonIoTasks.persistObject( persistObjectRequest, self.objectStore, self.outOfProcessDownloaderPool) return None, dataSize except: message = "Error writing serialized object: %s:\n%s" % ( persistObjectRequest.objectPath, traceback.format_exc()) return message, None
def listPersistedObjects(self, prefix): try: result = PythonIoTasks.listPersistedObjects( prefix, self.objectStore, self.outOfProcessDownloaderPool) return None, result except: message = "Error listing persisted objects: %s:\n%s" % ( prefix, traceback.format_exc()) return message, None
def deletePersistedObject(self, keyname): try: PythonIoTasks.deletePersistedObject(keyname, self.objectStore, self.outOfProcessDownloaderPool) except: message = "Error deleting serialized object: %s:\n%s" % ( keyname, traceback.format_exc() ) #see if the object shows up as a listed object try: if len(self.objectStore.listValues(keyname)) == 0: #if not, then we can consider the deletion a success return except: message += "\n\nError while trying to list object:\n%s" % ( traceback.format_exc() ) return message
def writeToObjectStore(self, persistObjectRequest): try: dataSize = PythonIoTasks.persistObject(persistObjectRequest, self.objectStore, self.outOfProcessDownloaderPool) return None, dataSize except: message = "Error writing serialized object: %s:\n%s" % ( persistObjectRequest.objectPath, traceback.format_exc() ) return message, None
def handleInitiateMultipartS3Upload(self, request): uploadId = PythonIoTasks.initiateMultipartS3Upload( self.s3Interface, self.outOfProcessDownloaderPool, request.asInitiateMultipartS3Upload.credentials.bucketname, request.asInitiateMultipartS3Upload.credentials.keyname, request.asInitiateMultipartS3Upload.credentials.awsAccessKey, request.asInitiateMultipartS3Upload.credentials.awsSecretKey, request.asInitiateMultipartS3Upload.credentials.region) self.datasetRequestChannel_.write( CumulusNative.PythonIoTaskResponse.MultipartS3UploadInitiated( request.guid, uploadId))
def handleLoadExternalDatasetRequest(self, request, guid): t0 = time.time() PythonIoTasks.loadExternalDataset( self.s3Interface, request, self.vdm_, self.outOfProcessDownloaderPool ) logging.info( "PythonIoTaskService succeeded in loading %s in %s. tasks=%s", request, time.time() - t0, self.totalTasks ) self.datasetRequestChannel_.write( CumulusNative.PythonIoTaskResponse.Success( guid ) )
def listPersistedObjects(self, prefix): try: result = PythonIoTasks.listPersistedObjects(prefix, self.objectStore, self.outOfProcessDownloaderPool) return None, result except: message = "Error listing persisted objects: %s:\n%s" % ( prefix, traceback.format_exc() ) return message, None
def handleLoadExternalDatasetAsForaValue(self, toRequest): externalDataset = toRequest.asLoadExternalDatasetAsForaValue.toLoad result = PythonIoTasks.loadExternalDatasetAsForaValue( externalDataset, self.vdm_ ) self.datasetRequestChannel_.write( CumulusNative.PythonIoTaskResponse.DatasetAsForaValue( toRequest.guid, result ) )
def handleInitiateMultipartS3Upload(self, request): uploadId = PythonIoTasks.initiateMultipartS3Upload( self.s3Interface, self.outOfProcessDownloaderPool, request.asInitiateMultipartS3Upload.credentials.bucketname, request.asInitiateMultipartS3Upload.credentials.keyname, request.asInitiateMultipartS3Upload.credentials.awsAccessKey, request.asInitiateMultipartS3Upload.credentials.awsSecretKey, request.asInitiateMultipartS3Upload.credentials.region ) self.datasetRequestChannel_.write( CumulusNative.PythonIoTaskResponse.MultipartS3UploadInitiated( request.guid, uploadId ) )
def extractPersistedObject(self, keyname): try: result = PythonIoTasks.extractPersistedObject( keyname, self.objectStore, self.outOfProcessDownloaderPool) return None, result except: message = "Error reading serialized object: %s:\n%s" % ( keyname, traceback.format_exc()) #see if the object shows up as a listed object try: if len(self.objectStore.listValues(keyname)) == 0: return None, None except: message += "\n\nError while trying to list serialized object:\n%s" % ( traceback.format_exc()) return message, None
def extractPersistedObject(self, keyname): try: result = PythonIoTasks.extractPersistedObject(keyname, self.objectStore, self.outOfProcessDownloaderPool) return None, result except: message = "Error reading serialized object: %s:\n%s" % ( keyname, traceback.format_exc() ) #see if the object shows up as a listed object try: if len(self.objectStore.listValues(keyname)) == 0: return None, None except: message += "\n\nError while trying to list serialized object:\n%s" % ( traceback.format_exc() ) return message, None
def computeOneNode(self, node): """push 'node' one step further in its computation requirements self.intermediates_[node] either contains a list of values to be computed or an execution context """ if self.intermediates_[node] is None: context = self.grabContext() #the intermediates can either be None or #an execution context. in this case, since its a list #we have not even started computation yet, so we need to create #an ExecutionContext and begin computing with self.contextEnterer(context): context.resetInterruptState() if isinstance(node, tuple): with freestoreLock: #this operation may be copying values in the freestore as we're #updating them, so we need to do it under a lock context.placeInEvaluationStateWithoutRenamingMutableVectors( ImplValContainer_(tuple(node)) ) context.compute() elif isinstance(node, SplitSubcomputation): context.resumePausedComputation(node.pausedComputationTree) context.resetInterruptState() context.compute() else: assert False, "don't know what to do with node of type %s" % node self.intermediates_[node] = context elif isinstance(self.intermediates_[node], FORANative.ExecutionContext): #this was a cacherequest node, and if we're here, we filled them #all out context = self.intermediates_[node] req = context.getCacheRequest() if CacheSemantics.isCacheRequestWithResult(req): result = CacheSemantics.getCacheRequestComputationResult(req) with self.contextEnterer(context): context.resetInterruptState() context.addCachecallResult(result) context.compute() else: cacheCalls = [x.extractApplyTuple() for x in CacheSemantics.processCacheCall(req)] res = [] exception = None for t in cacheCalls: assert t in self.finishedValuesAndTimeElapsed_, ( "Couldn't find result for: %s in %s" % (t,"\n".join([str(x) for x in self.finishedValuesAndTimeElapsed_.keys()])) ) if self.finishedValuesAndTimeElapsed_[t][0].isException(): if exception is None: exception = self.finishedValuesAndTimeElapsed_[t][0] else: res.append(self.finishedValuesAndTimeElapsed_[t][0].asResult.result) with self.contextEnterer(context): if exception: context.resetInterruptState() context.addCachecallResult(exception) context.compute() else: context.resetInterruptState() context.addCachecallResult( ComputationResult_.Result( ImplValContainer_(tuple(res)) ) ) context.compute() else: #this was a split request splitResult = self.intermediates_[node] for ix in range(len(splitResult.splits)): child = splitResult.childComputations[ix] assert child in self.finishedValuesAndTimeElapsed_ value = self.finishedValuesAndTimeElapsed_[child][0] timeElapsed = self.finishedValuesAndTimeElapsed_[child][1] del self.finishedValuesAndTimeElapsed_[child] if value.isFailure(): self.finishNode_(node, value) self.checkContextBackIn(splitResult.context) return else: splitResult.context.absorbSplitResult( splitResult.splits[ix].computationHash, value, timeElapsed ) with self.lock_: context = splitResult.context context.resetInterruptState() self.intermediates_[node] = context with self.contextEnterer(context): context.compute() while True: if context.isFinished(): result = context.getFinishedResult() timeElapsed = context.getTotalTimeElapsed() self.checkContextBackIn(context) #now, wake up any dependencies self.finishNode_(node, result, timeElapsed) return elif context.isVectorLoad(): for vectorToLoad in context.getVectorLoadAsVDIDs(): toLoad = None loaded = False if self.offlineCache_ is not None: toLoad = self.offlineCache_.loadIfExists(vectorToLoad.page) if toLoad is not None: self.vdm_.loadSerializedVectorPage(vectorToLoad.page, toLoad) loaded = True if not loaded and vectorToLoad.isExternal(): #is this an external dataset, attempt to load it from there PythonIoTasks.loadExternalDataset( getCurrentS3Interface(), vectorToLoad, self.vdm_, self.inProcessDownloader ) loaded = True assert loaded, "lost the definition for VDID: %s" % vectorToLoad with self.contextEnterer(context): context.resetInterruptState() context.compute() #go back around and try again elif context.isInterrupted(): toResume = None if self.checkShouldSplit(context): splits = context.splitComputation() if splits is not None: with self.lock_: splitResult = self.computeIntermediatesForSplitResult(node, splits, context) self.intermediates_[node] = splitResult return #if we're here, then we didn't split #go back around and try again with self.contextEnterer(context): if toResume is not None: context.resumePausedComputation(toResume) context.resetInterruptState() context.compute() elif context.isCacheRequest(): #these are thew new dependencies req = context.getCacheRequest() deps = set() if CacheSemantics.isCacheRequestWithResult(req): pass else: cacheCalls = [x.extractApplyTuple() for x in CacheSemantics.processCacheCall(req)] with self.lock_: #register any dependencies for t in cacheCalls: if t not in self.finishedValuesAndTimeElapsed_ and t not in self.intermediates_: #its a new request self.intermediates_[t] = None self.completable_.put(t) self.watchers_[t] = threading.Event() if t not in self.finishedValuesAndTimeElapsed_: deps.add(t) self.dependencies_[node] = deps if not deps: #we could go again with self.lock_: self.completable_.put(node) return
def computeOneNode(self, node): """push 'node' one step further in its computation requirements self.intermediates_[node] either contains a list of values to be computed or an execution context """ if self.intermediates_[node] is None: context = self.grabContext() #the intermediates can either be None or #an execution context. in this case, since its a list #we have not even started computation yet, so we need to create #an ExecutionContext and begin computing with self.contextEnterer(context): context.resetInterruptState() if isinstance(node, tuple): with freestoreLock: #this operation may be copying values in the freestore as we're #updating them, so we need to do it under a lock context.placeInEvaluationStateWithoutRenamingMutableVectors(*node) context.resume() elif isinstance(node, FORANative.PausedComputation): context.resumePausedComputation(node) context.resetInterruptState() context.resume() else: assert False, "don't know what to do with node of type %s" % node self.intermediates_[node] = context elif isinstance(self.intermediates_[node], FORANative.ExecutionContext): #this was a cacherequest node, and if we're here, we filled them #all out context = self.intermediates_[node] req = context.getCacheRequest() if CacheSemantics.isVectorCacheLoadRequest(req): with self.contextEnterer(context): context.resetInterruptState() context.resume( ComputationResult_.Result( ImplValContainer_(), ImplValContainer_() ) ) elif CacheSemantics.isCacheRequestWithResult(req): result = CacheSemantics.getCacheRequestComputationResult(req) with self.contextEnterer(context): context.resetInterruptState() context.resume(result) else: cacheCalls = [x.extractApplyTuple() for x in CacheSemantics.processCacheCall(req)] res = [] exception = None for t in cacheCalls: assert t in self.finishedValues_, ( "Couldn't find result for: %s in %s" % (t,"\n".join([str(x) for x in self.finishedValues_.keys()])) ) if self.finishedValues_[t].isException(): if exception is None: exception = self.finishedValues_[t] else: res.append(self.finishedValues_[t].asResult.result) with self.contextEnterer(context): if exception: context.resetInterruptState() context.resume(exception) else: context.resetInterruptState() context.resume( ComputationResult_.Result( ImplValContainer_(tuple(res)), ImplValContainer_() ) ) else: #this was a split request splitResult, splitComputationLog = self.intermediates_[node] for slotComputation in splitResult.submittedComputations(): assert slotComputation in self.finishedValues_ value = self.finishedValues_[slotComputation] if value.isFailure(): self.finishNode_(node, value) return else: splitResult.futuresSplitResult.slotCompleted( splitResult.computationsToSlotIndices[slotComputation], value) del splitResult.computationsToSlotIndices[slotComputation] submittableFutures = splitResult.futuresSplitResult.indicesOfSubmittableFutures() if len(submittableFutures) == 0: context = self.grabContext() toResumeWith = splitResult.futuresSplitResult.getFinalResult() context.resumePausedComputation(toResumeWith) context.resetInterruptState() self.intermediates_[node] = context with self.contextEnterer(context): context.resume() else: with self.lock_: futuresSplitResult = splitResult.futuresSplitResult isFinished, result = self.findMeatyPausedComputations(futuresSplitResult) if not isFinished: splitResult = self.computeIntermediatesForSplitResult( node, futuresSplitResult, result) self.intermediates_[node] = (splitResult, []) return else: toResume = result context = self.grabContext() context.resumePausedComputation(toResume) context.resetInterruptState() self.intermediates_[node] = context with self.contextEnterer(context): context.resume() while True: if context.isFinished(): result = context.getFinishedResult() self.checkContextBackIn(context) #now, wake up any dependencies self.finishNode_(node, result) return elif context.isVectorLoad(): for vectorToLoad in context.getVectorLoadAsVDIDs(): toLoad = None loaded = False if self.offlineCache_ is not None: toLoad = self.offlineCache_.loadIfExists(vectorToLoad.page) if toLoad is not None: self.vdm_.loadSerializedVectorPage(vectorToLoad.page, toLoad) loaded = True if not loaded and vectorToLoad.isExternal(): #is this an external dataset, attempt to load it from there PythonIoTasks.loadExternalDataset( getCurrentS3Interface(), vectorToLoad, self.vdm_, self.inProcessDownloader ) loaded = True assert loaded, "lost the definition for VDID: %s" % vectorToLoad with self.contextEnterer(context): context.resetInterruptState() context.resume() #go back around and try again elif context.isInterrupted(): toResume = None if self.checkShouldSplit(context): futuresSplitResult = context.splitWithFutures() if futuresSplitResult is not None: with self.lock_: futuresSplitResult.disallowRepeatNodes() isFinished, result = self.findMeatyPausedComputations(futuresSplitResult) if not isFinished: splitResult = self.computeIntermediatesForSplitResult( node, futuresSplitResult, result) self.intermediates_[node] = (splitResult, context.getComputationLog()) self.checkContextBackIn(context) return else: toResume = result #if we're here, then we didn't split #go back around and try again with self.contextEnterer(context): if toResume is not None: context.resumePausedComputation(toResume) context.resetInterruptState() context.resume() elif context.isCacheRequest(): #these are thew new dependencies req = context.getCacheRequest() deps = set() if CacheSemantics.isVectorCacheLoadRequest(req): pass elif CacheSemantics.isCacheRequestWithResult(req): pass else: cacheCalls = [x.extractApplyTuple() for x in CacheSemantics.processCacheCall(req)] with self.lock_: #register any dependencies for t in cacheCalls: if t not in self.finishedValues_ and t not in self.intermediates_: #its a new request self.intermediates_[t] = None self.completable_.put(t) self.watchers_[t] = threading.Event() if t not in self.finishedValues_: deps.add(t) self.dependencies_[node] = deps if not deps: #we could go again with self.lock_: self.completable_.put(node) return