def test_chunking(self): self.assertEqual( CacheSemantics.getAppropriateChunksForSize(0,50), [] ) self.assertEqual( CacheSemantics.getAppropriateChunksForSize(100,50), [(0,50),(50,100)] ) self.assertEqual( CacheSemantics.getAppropriateChunksForSize(120,50), [(0,50),(50,120)] ) self.assertEqual( CacheSemantics.getAppropriateChunksForSize(130,50), [(0,50),(50,100),(100,130)] )
def computeOneNode(self, node): """push 'node' one step further in its computation requirements self.intermediates_[node] either contains a list of values to be computed or an execution context """ if self.intermediates_[node] is None: context = self.grabContext() #the intermediates can either be None or #an execution context. in this case, since its a list #we have not even started computation yet, so we need to create #an ExecutionContext and begin computing with self.contextEnterer(context): context.resetInterruptState() if isinstance(node, tuple): with freestoreLock: #this operation may be copying values in the freestore as we're #updating them, so we need to do it under a lock context.placeInEvaluationStateWithoutRenamingMutableVectors(*node) context.resume() elif isinstance(node, FORANative.PausedComputation): context.resumePausedComputation(node) context.resetInterruptState() context.resume() else: assert False, "don't know what to do with node of type %s" % node self.intermediates_[node] = context elif isinstance(self.intermediates_[node], FORANative.ExecutionContext): #this was a cacherequest node, and if we're here, we filled them #all out context = self.intermediates_[node] req = context.getCacheRequest() if CacheSemantics.isVectorCacheLoadRequest(req): with self.contextEnterer(context): context.resetInterruptState() context.resume( ComputationResult_.Result( ImplValContainer_(), ImplValContainer_() ) ) elif CacheSemantics.isCacheRequestWithResult(req): result = CacheSemantics.getCacheRequestComputationResult(req) with self.contextEnterer(context): context.resetInterruptState() context.resume(result) else: cacheCalls = [x.extractApplyTuple() for x in CacheSemantics.processCacheCall(req)] res = [] exception = None for t in cacheCalls: assert t in self.finishedValues_, ( "Couldn't find result for: %s in %s" % (t,"\n".join([str(x) for x in self.finishedValues_.keys()])) ) if self.finishedValues_[t].isException(): if exception is None: exception = self.finishedValues_[t] else: res.append(self.finishedValues_[t].asResult.result) with self.contextEnterer(context): if exception: context.resetInterruptState() context.resume(exception) else: context.resetInterruptState() context.resume( ComputationResult_.Result( ImplValContainer_(tuple(res)), ImplValContainer_() ) ) else: #this was a split request splitResult, splitComputationLog = self.intermediates_[node] for slotComputation in splitResult.submittedComputations(): assert slotComputation in self.finishedValues_ value = self.finishedValues_[slotComputation] if value.isFailure(): self.finishNode_(node, value) return else: splitResult.futuresSplitResult.slotCompleted( splitResult.computationsToSlotIndices[slotComputation], value) del splitResult.computationsToSlotIndices[slotComputation] submittableFutures = splitResult.futuresSplitResult.indicesOfSubmittableFutures() if len(submittableFutures) == 0: context = self.grabContext() toResumeWith = splitResult.futuresSplitResult.getFinalResult() context.resumePausedComputation(toResumeWith) context.resetInterruptState() self.intermediates_[node] = context with self.contextEnterer(context): context.resume() else: with self.lock_: futuresSplitResult = splitResult.futuresSplitResult isFinished, result = self.findMeatyPausedComputations(futuresSplitResult) if not isFinished: splitResult = self.computeIntermediatesForSplitResult( node, futuresSplitResult, result) self.intermediates_[node] = (splitResult, []) return else: toResume = result context = self.grabContext() context.resumePausedComputation(toResume) context.resetInterruptState() self.intermediates_[node] = context with self.contextEnterer(context): context.resume() while True: if context.isFinished(): result = context.getFinishedResult() self.checkContextBackIn(context) #now, wake up any dependencies self.finishNode_(node, result) return elif context.isVectorLoad(): for vectorToLoad in context.getVectorLoadAsVDIDs(): toLoad = None loaded = False if self.offlineCache_ is not None: toLoad = self.offlineCache_.loadIfExists(vectorToLoad.page) if toLoad is not None: self.vdm_.loadSerializedVectorPage(vectorToLoad.page, toLoad) loaded = True if not loaded and vectorToLoad.isExternal(): #is this an external dataset, attempt to load it from there PythonIoTasks.loadExternalDataset( getCurrentS3Interface(), vectorToLoad, self.vdm_, self.inProcessDownloader ) loaded = True assert loaded, "lost the definition for VDID: %s" % vectorToLoad with self.contextEnterer(context): context.resetInterruptState() context.resume() #go back around and try again elif context.isInterrupted(): toResume = None if self.checkShouldSplit(context): futuresSplitResult = context.splitWithFutures() if futuresSplitResult is not None: with self.lock_: futuresSplitResult.disallowRepeatNodes() isFinished, result = self.findMeatyPausedComputations(futuresSplitResult) if not isFinished: splitResult = self.computeIntermediatesForSplitResult( node, futuresSplitResult, result) self.intermediates_[node] = (splitResult, context.getComputationLog()) self.checkContextBackIn(context) return else: toResume = result #if we're here, then we didn't split #go back around and try again with self.contextEnterer(context): if toResume is not None: context.resumePausedComputation(toResume) context.resetInterruptState() context.resume() elif context.isCacheRequest(): #these are thew new dependencies req = context.getCacheRequest() deps = set() if CacheSemantics.isVectorCacheLoadRequest(req): pass elif CacheSemantics.isCacheRequestWithResult(req): pass else: cacheCalls = [x.extractApplyTuple() for x in CacheSemantics.processCacheCall(req)] with self.lock_: #register any dependencies for t in cacheCalls: if t not in self.finishedValues_ and t not in self.intermediates_: #its a new request self.intermediates_[t] = None self.completable_.put(t) self.watchers_[t] = threading.Event() if t not in self.finishedValues_: deps.add(t) self.dependencies_[node] = deps if not deps: #we could go again with self.lock_: self.completable_.put(node) return
def computeOneNode(self, node): """push 'node' one step further in its computation requirements self.intermediates_[node] either contains a list of values to be computed or an execution context """ if self.intermediates_[node] is None: context = self.grabContext() #the intermediates can either be None or #an execution context. in this case, since its a list #we have not even started computation yet, so we need to create #an ExecutionContext and begin computing with self.contextEnterer(context): context.resetInterruptState() if isinstance(node, tuple): with freestoreLock: #this operation may be copying values in the freestore as we're #updating them, so we need to do it under a lock context.placeInEvaluationStateWithoutRenamingMutableVectors( ImplValContainer_(tuple(node)) ) context.compute() elif isinstance(node, SplitSubcomputation): context.resumePausedComputation(node.pausedComputationTree) context.resetInterruptState() context.compute() else: assert False, "don't know what to do with node of type %s" % node self.intermediates_[node] = context elif isinstance(self.intermediates_[node], FORANative.ExecutionContext): #this was a cacherequest node, and if we're here, we filled them #all out context = self.intermediates_[node] req = context.getCacheRequest() if CacheSemantics.isCacheRequestWithResult(req): result = CacheSemantics.getCacheRequestComputationResult(req) with self.contextEnterer(context): context.resetInterruptState() context.addCachecallResult(result) context.compute() else: cacheCalls = [x.extractApplyTuple() for x in CacheSemantics.processCacheCall(req)] res = [] exception = None for t in cacheCalls: assert t in self.finishedValuesAndTimeElapsed_, ( "Couldn't find result for: %s in %s" % (t,"\n".join([str(x) for x in self.finishedValuesAndTimeElapsed_.keys()])) ) if self.finishedValuesAndTimeElapsed_[t][0].isException(): if exception is None: exception = self.finishedValuesAndTimeElapsed_[t][0] else: res.append(self.finishedValuesAndTimeElapsed_[t][0].asResult.result) with self.contextEnterer(context): if exception: context.resetInterruptState() context.addCachecallResult(exception) context.compute() else: context.resetInterruptState() context.addCachecallResult( ComputationResult_.Result( ImplValContainer_(tuple(res)) ) ) context.compute() else: #this was a split request splitResult = self.intermediates_[node] for ix in range(len(splitResult.splits)): child = splitResult.childComputations[ix] assert child in self.finishedValuesAndTimeElapsed_ value = self.finishedValuesAndTimeElapsed_[child][0] timeElapsed = self.finishedValuesAndTimeElapsed_[child][1] del self.finishedValuesAndTimeElapsed_[child] if value.isFailure(): self.finishNode_(node, value) self.checkContextBackIn(splitResult.context) return else: splitResult.context.absorbSplitResult( splitResult.splits[ix].computationHash, value, timeElapsed ) with self.lock_: context = splitResult.context context.resetInterruptState() self.intermediates_[node] = context with self.contextEnterer(context): context.compute() while True: if context.isFinished(): result = context.getFinishedResult() timeElapsed = context.getTotalTimeElapsed() self.checkContextBackIn(context) #now, wake up any dependencies self.finishNode_(node, result, timeElapsed) return elif context.isVectorLoad(): for vectorToLoad in context.getVectorLoadAsVDIDs(): toLoad = None loaded = False if self.offlineCache_ is not None: toLoad = self.offlineCache_.loadIfExists(vectorToLoad.page) if toLoad is not None: self.vdm_.loadSerializedVectorPage(vectorToLoad.page, toLoad) loaded = True if not loaded and vectorToLoad.isExternal(): #is this an external dataset, attempt to load it from there PythonIoTasks.loadExternalDataset( getCurrentS3Interface(), vectorToLoad, self.vdm_, self.inProcessDownloader ) loaded = True assert loaded, "lost the definition for VDID: %s" % vectorToLoad with self.contextEnterer(context): context.resetInterruptState() context.compute() #go back around and try again elif context.isInterrupted(): toResume = None if self.checkShouldSplit(context): splits = context.splitComputation() if splits is not None: with self.lock_: splitResult = self.computeIntermediatesForSplitResult(node, splits, context) self.intermediates_[node] = splitResult return #if we're here, then we didn't split #go back around and try again with self.contextEnterer(context): if toResume is not None: context.resumePausedComputation(toResume) context.resetInterruptState() context.compute() elif context.isCacheRequest(): #these are thew new dependencies req = context.getCacheRequest() deps = set() if CacheSemantics.isCacheRequestWithResult(req): pass else: cacheCalls = [x.extractApplyTuple() for x in CacheSemantics.processCacheCall(req)] with self.lock_: #register any dependencies for t in cacheCalls: if t not in self.finishedValuesAndTimeElapsed_ and t not in self.intermediates_: #its a new request self.intermediates_[t] = None self.completable_.put(t) self.watchers_[t] = threading.Event() if t not in self.finishedValuesAndTimeElapsed_: deps.add(t) self.dependencies_[node] = deps if not deps: #we could go again with self.lock_: self.completable_.put(node) return