def test_schedulerEventsAreSerializable(self): s3 = InMemoryS3Interface.InMemoryS3InterfaceFactory() result, simulation = self.computeUsingSeveralWorkers(""" sum(0,10**10) """, s3, 4, timeout=240, returnSimulation=True ) try: someHadEvents = False for worker, vdm, eventHandler in simulation.workersVdmsAndEventHandlers: events = eventHandler.extractEvents() events2 = pickle.loads(pickle.dumps(events)) print len(events), "events" print len(pickle.dumps(events)), " bytes" print len(pickle.dumps(events)) / len(events), " bytes per event." self.assertTrue(len(events2) == len(events)) if len(events): someHadEvents = True CumulusNative.replayCumulusWorkerEventStream(events, True) self.assertTrue(someHadEvents) worker = None vdm = None eventHandler = None finally: simulation.teardown()
def createWorker(machineId, viewFactory, callbackSchedulerToUse=None, threadCount=2, memoryLimitMb=100): if callbackSchedulerToUse is None: callbackSchedulerToUse = CallbackScheduler.singletonForTesting() vdm = ForaNative.VectorDataManager(callbackSchedulerToUse, 5 * 1024 * 1024) vdm.setMemoryLimit( int(memoryLimitMb * 1024 * 1024), min(int(memoryLimitMb * 1.25 * 1024 * 1024), int((memoryLimitMb + 1024 * 2) * 1024 * 1024))) vdm.setPersistentCacheIndex( CumulusNative.PersistentCacheIndex(viewFactory.createView(), callbackSchedulerToUse)) cache = CumulusNative.SimpleOfflineCache(callbackSchedulerToUse, 1000 * 1024 * 1024) eventHandler = CumulusNative.CumulusWorkerHoldEventsInMemoryEventHandler() return (CumulusNative.CumulusWorker( callbackSchedulerToUse, CumulusNative.CumulusWorkerConfiguration( machineId, threadCount, CumulusNative.CumulusCheckpointPolicy.None (), ExecutionContext.createContextConfiguration(), ""), vdm, cache, eventHandler), vdm, eventHandler)
def test_persistentCacheUnderLoad(self): cppView1 = CumulusNative.PersistentCacheIndex( self.sharedState.newView(), callbackScheduler) t0 = time.time() #add 100k pages, which is enough for about 5 TB of data for index in range(100000): if index % 1000 == 0 and index > 0: print index, (time.time() - t0) / (index / 1000.0), " seconds per 1000" cppView1.addPage(sha1("page" + str(index)), HashSet(), 1, sha1("")) print "took ", time.time() - t0, " to add 100k." t1 = time.time() bytes0 = TCMallocNative.getBytesUsed() cppView2 = CumulusNative.PersistentCacheIndex( self.sharedState.newView(), callbackScheduler) while cppView2.totalBytesInCache() < 100000: time.sleep(.1) print cppView2.totalBytesInCache() print "took ", time.time() - t1, " to load 100k. Total RAM is ", (TCMallocNative.getBytesUsed() - bytes0) / 1024 / 1024.0, " MB per view"\
def submitComputationOnClient(self, clientIndex, expressionText, **freeVariables): if isinstance(expressionText, ForaNative.ImplValContainer): computationDefinition = CumulusNative.ComputationDefinition.Root( CumulusNative.ImmutableTreeVectorOfComputationDefinitionTerm( [CumulusNative.ComputationDefinitionTerm.Value(x, None) for x in expressionText.getTuple()] ) ) elif isinstance(expressionText, CumulusNative.ComputationDefinition): computationDefinition = expressionText else: varNames = list(freeVariables.keys()) expr = FORA.eval("fun(" + ",".join(varNames) + ") {" + expressionText + " } ") computationDefinition = createComputationDefinition( FORA.extractImplValContainer( expr ), ForaNative.makeSymbol("Call"), *[freeVariables[v] for v in varNames] ) computationId = self.getClient(clientIndex).createComputation(computationDefinition) self.getClient(clientIndex).setComputationPriority( computationId, CumulusNative.ComputationPriority(1) ) return computationId
def circularPageReferenceTest(self, shouldBeInvalid): cppView1 = CumulusNative.PersistentCacheIndex( self.sharedState.newView(), callbackScheduler) computationId = CumulusNative.ComputationId.Root(sha1("computation")) checkpointRequest = CumulusNative.CheckpointRequest( 0.0, True, computationId) cppView1.addBigvec(sha1("bigvec1"), HashSet() + sha1("page1"), 2, sha1("")) cppView1.addPage( sha1("page1"), (HashSet() + sha1("bigvec1")) if shouldBeInvalid else HashSet(), 1, sha1("")) cppView1.addCheckpointFile(checkpointRequest, sha1("file"), HashSet() + sha1("bigvec1"), 2, sha1("")) cppView1.addCheckpoint(checkpointRequest, HashSet() + sha1("file"), 2, sha1(""), True, 1.0, HashSet()) self.assertTrue( len(cppView1.computeInvalidObjects()) == (4 if shouldBeInvalid else 0), "%s != %s" % (len(cppView1.computeInvalidObjects()), (4 if shouldBeInvalid else 0)))
def __init__(self, callbackScheduler, vdm, viewFactory): Stoppable.Stoppable.__init__(self) self.lock_ = threading.Lock() self.callbackScheduler = callbackScheduler self.definitionToIdMap_ = {} self.idToDefinitionMap_ = {} self.vdm = vdm self.onJsonViewOfSystemChanged = None self.persistentCacheIndex = CumulusNative.PersistentCacheIndex( viewFactory.createView(retrySeconds=10.0, numRetries=10), callbackScheduler) self.vdm.setPersistentCacheIndex(self.persistentCacheIndex) self.cumulusClientId = CumulusNative.CumulusClientId( Hash.Hash.sha1(str(uuid.uuid4()))) logging.info("CumulusClient created with %s", self.cumulusClientId) self.cumulusClient = CumulusNative.CumulusClient( vdm, self.cumulusClientId, self.callbackScheduler) self.finalResponses = Queue.Queue() self.cumulusClientListener = self.cumulusClient.createListener() self.cpuAssignmentDependencyGraph = CumulusNative.CpuAssignmentDependencyGraph( self.callbackScheduler.getFactory().createScheduler( self.callbackScheduler.getMetadata() + "_cpuAssignmentGraph", 1), self.vdm) self.cpuAssignmentDependencyGraph.subscribeToCumulusClient( self.cumulusClient) self.pendingCallbacksByGuid = {} self.cpuAssignmentDependencyGraphListener = \ self.cpuAssignmentDependencyGraph.createListener() self.threads = [] self.threads.append( ManagedThread.ManagedThread(target=self.processClientMessages_, args=())) self.threads.append( ManagedThread.ManagedThread( target=self.processDependencyGraphMessages_, args=())) for t in self.threads: t.start() self.nextCpuUpdateTime = time.time() self.cpuMessagesSinceLastUpdate = 0 self.lastSystemwideUpdateTime = time.time()
def createWorker_(machineId, viewFactory, callbackSchedulerToUse, threadCount, memoryLimitMb, cacheFunction, pageSizeOverride, disableEventHandler): if callbackSchedulerToUse is None: callbackSchedulerToUse = CallbackScheduler.singletonForTesting() vdm = ForaNative.VectorDataManager( callbackSchedulerToUse, pageSizeOverride if pageSizeOverride is not None else 1 * 1024 * 1024 if memoryLimitMb < 1000 else 5 * 1024 * 1024 if memoryLimitMb < 5000 else 50 * 1024 * 1024 ) vdm.setMemoryLimit( int(memoryLimitMb * 1024 * 1024), min(int(memoryLimitMb * 1.25 * 1024 * 1024), int((memoryLimitMb + 1024 * 2) * 1024 * 1024)) ) vdm.setPersistentCacheIndex( CumulusNative.PersistentCacheIndex( viewFactory.createView(), callbackSchedulerToUse ) ) cache = cacheFunction() if disableEventHandler: eventHandler = CumulusNative.CumulusWorkerIgnoreEventHandler() else: eventHandler = CumulusNative.CumulusWorkerHoldEventsInMemoryEventHandler() return ( CumulusNative.CumulusWorker( callbackSchedulerToUse, CumulusNative.CumulusWorkerConfiguration( machineId, threadCount, CumulusNative.CumulusCheckpointPolicy.None(), ExecutionContext.createContextConfiguration(), "" ), vdm, cache, eventHandler ), vdm, eventHandler )
def cacheFunction(self): if self.useInMemoryCache: return CumulusNative.SimpleOfflineCache(self.callbackScheduler, 1000 * 1024 * 1024) else: self.diskCacheCount += 1 return CumulusNative.DiskOfflineCache( self.callbackScheduler, os.path.join(self.diskCacheStorageDir, str(self.diskCacheCount)), 100 * 1024 * 1024 * 1024, 100000 )
def createServiceAndServiceThread(self): config = Setup.config() config.cumulusMaxRamCacheMB = self.cumulusMaxRamCacheSizeOverride / 1024 / 1024 config.cumulusVectorRamCacheMB = self.cumulusVectorRamCacheSizeOverride / 1024 / 1024 config.cumulusTrackTcmalloc = False config.cumulusServiceThreadCount = self.cumulusThreadCountOverride config.cumulusDiskCacheStorageSubdirectory = str(uuid.uuid4()) ownAddress = str(uuid.uuid4()) callbackScheduler = self.callbackSchedulerFactory.createScheduler( "InMemoryClusterChild", 1) channelListener = self.createMultiChannelListener( callbackScheduler, [Setup.config().cumulusControlPort, Setup.config().cumulusDataPort], ownAddress) service = CumulusService.CumulusService( ownAddress=ownAddress, channelListener=channelListener, channelFactory=self.channelManager.createChannelFactory(), eventHandler=CumulusNative.CumulusWorkerHoldEventsInMemoryEventHandler(), callbackScheduler=callbackScheduler, diagnosticsDir=None, config=config, viewFactory=self.sharedStateViewFactory ) service.startService(lambda: None) return service
def increaseRequestCount(self, compValue, cumulusComputationDefinition): computationId = self.cumulusGateway.getComputationIdForDefinition( cumulusComputationDefinition ) with self.lock_: if computationId not in self.computedValuesForComputations: self.computedValuesForComputations[computationId] = set() self.computedValuesForComputations[computationId].add(compValue) self.refcountsForCompIds_[computationId] += 1 if self.refcountsForCompIds_[computationId] == 1: self.cumulusGateway.setComputationPriority( computationId, CumulusNative.ComputationPriority(self.allocNewPriority_()) ) if computationId in self.finishedResultsForComputations: result, statistics = self.finishedResultsForComputations[computationId] BackgroundUpdateQueue.push( self.valueUpdater( compValue, result, statistics ) )
def evaluate(self, *args): self.verifyWorkersAvailable() try: args = self.expandIfListOrTuple(*args) self.submitComputation(*args) while True: try: result = self.results_.get_nowait() break except Queue.Empty: pass return result except KeyboardInterrupt: with self.lock_: if self.currentComputationId is not None: self.cumulusGateway.setComputationPriority( self.currentComputationId, CumulusNative.ComputationPriority()) self.currentComputationId = None while not self.results_.empty(): self.results_.get(False) raise
def onWorkerAdd(self, ip, ports, machineIdAsString): machineId = CumulusNative.MachineId(Hash.Hash.stringToHash(machineIdAsString)) if machineId <= self.machineId: logging.info("Worker %s detected worker %s, and waiting for incoming connection", self.machineId, machineId) #only connect one way. If the worker is larger than us, then we connect to it return guid = Hash.Hash.sha1(str(uuid.uuid4())) logging.info( "Worker %s detected worker %s and initiating connection with guid %s", self.machineId, machineId, guid ) with self.lock: # Track that we are trying to connect to this machine self.connectingMachines.add(machineId) ManagedThread.ManagedThread( target=self.onWorkerAdd2, args=(machineId, ip, ports, guid) ).start()
def makeComputationDefinitionFromIVCs(*args): return CumulusNative.ComputationDefinition.Root( CumulusNative.ImmutableTreeVectorOfComputationDefinitionTerm( [CumulusNative.ComputationDefinitionTerm.Value(ForaNative.ImplValContainer(x), None) for x in args] ) )
def createEventHandler(events_dir, callback_scheduler): if not os.path.isdir(events_dir): os.makedirs(events_dir) return CumulusNative.CumulusWorkerWriteToDiskEventHandler( callback_scheduler, os.path.join(events_dir, "ufora-worker-events.%s.log" % os.getpid()) )
def test_writing_while_disconnected(self): currentView = [self.sharedState.newView()] cppView1 = CumulusNative.PersistentCacheIndex(currentView[0], callbackScheduler) def writeInLoop(): for ix in range(100): time.sleep(0.01) cppView1.addPage(sha1("page" + str(ix)), HashSet(), ix, sha1("")) thread1 = threading.Thread(target=writeInLoop) thread1.start() def disconnectAndReconnectInLoop(): ix = 0 while thread1.isAlive(): ix += 1 time.sleep(0.004) currentView[0].disconnect() currentView[0] = self.sharedState.newView() cppView1.resetView(currentView[0]) thread2 = threading.Thread(target=disconnectAndReconnectInLoop) thread2.start() thread1.join() thread2.join() self.assertTrue(cppView1.timesViewReconnected() > 10) cppView2 = CumulusNative.PersistentCacheIndex( self.sharedState.newView(), callbackScheduler) time.sleep(2.0) count1 = 0 count2 = 0 for ix in range(100): if cppView1.pageExists(sha1("page" + str(ix))): count1 += 1 if cppView2.pageExists(sha1("page" + str(ix))): count2 += 1 self.assertTrue(count1 == 100 and count2 == 100, (count1, count2))
def requestComputation(self, computationDefinition): with self.lock_: oldId, id = self.getNewComputationIdForDefinition_( computationDefinition) self.setComputationPriority_(id, CumulusNative.ComputationPriority(1)) return id
def submitComputation(self, *args): self.currentComputationId = self.cumulusGateway.getComputationIdForDefinition( self.cumulusComputationDefinition(*args)) with self.lock_: self.cumulusGateway.setComputationPriority( self.currentComputationId, CumulusNative.ComputationPriority(self.allocNewPriority_()))
def createClient(clientId, callbackSchedulerToUse=None): if callbackSchedulerToUse is None: callbackSchedulerToUse = CallbackScheduler.singletonForTesting() vdm = ForaNative.VectorDataManager(callbackSchedulerToUse, 5 * 1024 * 1024) vdm.setMemoryLimit(100 * 1024 * 1024, 125 * 1024 * 1024) return (CumulusNative.CumulusClient(vdm, clientId, callbackSchedulerToUse), vdm)
def createComputationDefinition(*args): return CumulusNative.ComputationDefinition.Root( CumulusNative.ImmutableTreeVectorOfComputationDefinitionTerm( [CumulusNative.ComputationDefinitionTerm.Value(ForaNative.ImplValContainer(x), None) if not isinstance(x, CumulusNative.ComputationDefinition) else CumulusNative.ComputationDefinitionTerm.Subcomputation(x.asRoot.terms) for x in args] ) )
def __init__(self, callbackSchedulerFactory, callbackScheduler, ramCacheSize = None): self.callbackScheduler = callbackScheduler self.lock_ = threading.RLock() self.vectorDataIDRequestCount_ = {} self.vectorDataIDToVectorSlices_ = {} self.vdm = VectorDataManager.constructVDM(callbackScheduler, ramCacheSize) self.vdm.setDropUnreferencedPagesWhenFull(True) self.ramCacheOffloadRecorder = CumulusNative.TrackingOfflineStorage(self.callbackScheduler) self.vdm.setOfflineCache(self.ramCacheOffloadRecorder)
def setComputationPriority_(self, computationId, computationPriority): self.cumulusClient.setComputationPriority(computationId, computationPriority) if computationPriority == CumulusNative.ComputationPriority(): self.cpuAssignmentDependencyGraph.markNonrootComputation( computationId) else: self.cpuAssignmentDependencyGraph.markRootComputation( computationId)
def cancelComputation(self, compValue, cumulusComputationDefinition): computationId = self.cumulusGateway.getComputationIdForDefinition( cumulusComputationDefinition) with self.lock_: if computationId in self.refcountsForCompIds_ and self.refcountsForCompIds_[ computationId] > 0: self.refcountsForCompIds_[computationId] = 0 self.cumulusGateway.setComputationPriority( computationId, CumulusNative.ComputationPriority())
def cancelAllComputations(self, resetCompletely=False): with self.lock_: for compId, refcount in self.refcountsForCompIds_.iteritems(): if refcount > 0: self.cumulusGateway.setComputationPriority( compId, CumulusNative.ComputationPriority()) self.refcountsForCompIds_[compId] = 0 if resetCompletely: self.cumulusGateway.resetStateCompletely()
def main(parsedArguments): try: eventSets = pickle.load(open(parsedArguments.file, "r")) except: eventSets = None if eventSets is not None: for events in eventSets: logging.warn("**********************************************") if len(events): logging.warn("validating %s", events[0]) logging.warn("") CumulusNative.replayCumulusWorkerEventStream( events, parsedArguments.validation) else: CumulusNative.replayCumulusWorkerEventStreamFromFile( parsedArguments.file, parsedArguments.validation) return 0
def extractEventSets(parsedArguments): if len(parsedArguments.files) == 1 and 'scheduler_events' in parsedArguments.files[0]: return pickle.load(open(parsedArguments.files[0],"r")) else: result = [] for filename in parsedArguments.files: result.append( CumulusNative.extractCumulusWorkerEventsFromFile(filename) ) return result
def extractEventSets(parsedArguments): if len(parsedArguments.files ) == 1 and 'scheduler_events' in parsedArguments.files[0]: return pickle.load(open(parsedArguments.files[0], "r")) else: result = [] for filename in parsedArguments.files: result.append( CumulusNative.extractCumulusWorkerEventsFromFile(filename)) return result
def main(parsedArguments): try: eventSets = pickle.load(open(parsedArguments.file, "r")) except: eventSets = None if eventSets is not None: for events in eventSets: logging.warn("**********************************************") if len(events): logging.warn("validating %s", events[0]) logging.warn("") CumulusNative.replayCumulusWorkerEventStream(events, parsedArguments.validation) else: CumulusNative.replayCumulusWorkerEventStreamFromFile( parsedArguments.file, parsedArguments.validation ) return 0
def decreaseRequestCount(self, compValue, cumulusComputationDefinition): computationId = self.cumulusGateway.getComputationIdForDefinition( cumulusComputationDefinition) with self.lock_: if computationId in self.refcountsForCompIds_ and self.refcountsForCompIds_[ computationId] > 0: self.refcountsForCompIds_[computationId] -= 1 if self.refcountsForCompIds_[computationId] == 0: self.cumulusGateway.setComputationPriority( computationId, CumulusNative.ComputationPriority())
def test_dependingOnCheckpointedFinishedCachecallWorks(self): s3 = InMemoryS3Interface.InMemoryS3InterfaceFactory() simulation = InMemoryCumulusSimulation.InMemoryCumulusSimulation( 4, #worker count 1, memoryPerWorkerMB=100, threadsPerWorker=2, s3Service=s3) try: self.assertTrue(simulation.waitForGlobalScheduler(timeout=2.0)) simulation.waitForHandshake() baseComp = simulation.createComputation("1+2") baseCompId = simulation.getClient(0).createComputation(baseComp) simulation.getClient(0).setComputationPriority( baseCompId, CumulusNative.ComputationPriority(1)) result = simulation.waitForAnyResult() self.assertTrue(result.isResult()) simulation.getGlobalScheduler( ).triggerFullCheckpointsOnOutstandingComputations() self.waitForAllCheckpointsToClear(simulation) for ix in range(100): childComp = simulation.createComputation("x + %s" % ix, x=baseComp) childCompId = simulation.getClient(0).createComputation( childComp) simulation.getClient(0).setComputationPriority( childCompId, CumulusNative.ComputationPriority(1)) result = simulation.waitForAnyResult() finally: simulation.teardown()
def cumulusComputationDefinition(self, *args): terms = [] for arg in args: if isinstance(arg, ImplValContainer_): terms.append( CumulusNative.ComputationDefinitionTerm.Value(arg, None)) else: terms.append(arg.computationDefinitionTerm_) return CumulusNative.ComputationDefinition.Root( CumulusNative.ImmutableTreeVectorOfComputationDefinitionTerm( terms))
def summarize(filenames): for filename in filenames: print "*********************************************************************************" print filename print "*********************************************************************************" events = CumulusNative.extractCumulusWorkerEventsFromFile(filename) print events[0] print "timestamps: ", events[0].timestamp, events[-1].timestamp print "timestamps: ", formatTimestamp(events[0].timestamp), formatTimestamp(events[-1].timestamp) print print
def logBadUforaVersionOnChannel(self, version): try: anId = CumulusNative.MachineId(Hash.Hash(0)) anId.__setstate__(version) logging.error( "CumulusService %s received a bad version message that is, " \ "in fact, a machineId: %s", self.machineId, anId ) except: logging.error( "CumulusService %s received a bad version message that is not a machineId: %s", self.machineId, repr(version))
def summarize(filenames): for filename in filenames: print "*********************************************************************************" print filename print "*********************************************************************************" events = CumulusNative.extractCumulusWorkerEventsFromFile(filename) print events[0] print "timestamps: ", events[0].timestamp, events[-1].timestamp print "timestamps: ", formatTimestamp( events[0].timestamp), formatTimestamp(events[-1].timestamp) print print