def __init__(self): callbackSchedulerFactory = CallbackScheduler.createSimpleCallbackSchedulerFactory( ) self.callbackScheduler = callbackSchedulerFactory.createScheduler( "Simulator", 1) self.uforaPath = os.path.abspath( os.path.join(os.path.dirname(__file__), '../')) self.sharedStatePath = os.path.join(self.uforaPath, 'distributed/SharedState') self.sharedStateMainline = os.path.join(self.sharedStatePath, 'sharedStateMainline.py') self.gatewayServiceMainline = os.path.join( self.uforaPath, 'scripts/init/ufora-gateway.py') self.webPath = os.path.join(self.uforaPath, 'web/relay') self.relayScript = os.path.join(self.webPath, 'server.coffee') self.relayPort = Setup.config().relayPort self.relayHttpsPort = Setup.config().relayHttpsPort self.sharedStatePort = Setup.config().sharedStatePort self.restApiPort = Setup.config().restApiPort self.subscribableWebObjectsPort = Setup.config( ).subscribableWebObjectsPort #create an OutOfProcessDownloader so we can execute commands like 'forever' #from there, instead of forking from the main process (which can run out of memory) self.processPool = OutOfProcessDownloader.OutOfProcessDownloaderPool(1) self.desirePublisher = None self._connectionManager = None
def createServiceAndServiceThread(self): config = Setup.config() config.cumulusMaxRamCacheMB = self.cumulusMaxRamCacheSizeOverride config.cumulusVectorRamCacheMB = self.cumulusVectorRamCacheSizeOverride config.cumulusServiceThreadCount = self.cumulusThreadCountOverride config.cumulusDiskCacheStorageSubdirectory = str(uuid.uuid4()) ownAddress = str(uuid.uuid4()) callbackScheduler = self.callbackSchedulerFactory.createScheduler( "InMemoryClusterChild", 1) channelListener = self.createMultiChannelListener( callbackScheduler, [Setup.config().cumulusControlPort, Setup.config().cumulusDataPort], ownAddress) service = CumulusService.CumulusService( ownAddress=ownAddress, channelListener=channelListener, channelFactory=self.channelManager.createChannelFactory(), eventHandler=CumulusNative.CumulusWorkerHoldEventsInMemoryEventHandler(), callbackScheduler=callbackScheduler, diagnosticsDir=None, config=config, viewFactory=self.sharedStateViewFactory ) service.startService(lambda: None) return service
def startSharedState(self): cacheDir = Setup.config().getConfigValue( "SHARED_STATE_CACHE", os.path.join(Setup.config().fakeAwsBaseDir, 'ss_cache') ) logging.info("Starting shared state with cache dir '%s' and log file '%s'", cacheDir, self.sharedStateLogFile) with DirectoryScope.DirectoryScope(self.sharedStatePath): args = ['forever', '--killSignal', 'SIGTERM', '-l', self.sharedStateLogFile, 'start', '-c', 'python', self.sharedStateMainline, '--cacheDir', cacheDir, '--logging', 'info' ] def sharedStateStdout(msg): logging.info("SHARED STATE OUT> %s", msg) def sharedStateStderr(msg): logging.info("SHARED STATE ERR> %s", msg) startSharedState = SubprocessRunner.SubprocessRunner( args, sharedStateStdout, sharedStateStderr, dict(os.environ) ) startSharedState.start() startSharedState.wait(60.0) startSharedState.stop()
def __init__(self, callbackScheduler, cachePathOverride=None, port=None): self.callbackScheduler = callbackScheduler port = Setup.config().sharedStatePort logging.info("Initializing SharedStateService with port = %s", port) self.cachePath = cachePathOverride if cachePathOverride is not None else \ Setup.config().sharedStateCache if self.cachePath != '' and not os.path.exists(self.cachePath): os.makedirs(self.cachePath) CloudService.Service.__init__(self) self.socketServer = SimpleServer.SimpleServer(port) self.keyspaceManager = KeyspaceManager( 0, 1, pingInterval=120, cachePathOverride=cachePathOverride ) self.socketServer._onConnect = self.onConnect self.socketServerThread = ManagedThread.ManagedThread(target=self.socketServer.start) self.logfilePruneThread = ManagedThread.ManagedThread(target=self.logFilePruner) self.stoppedFlag = threading.Event()
def startService(self): self.stopRelay() self.stopGatewayService() self.stopSharedState() KillProcessHoldingPort.killProcessGroupHoldingPorts( Setup.config().basePort, Setup.config().basePort + Setup.config().numPorts) self.createSimulationDirectory() self.startSharedState() try: self.startGatewayService() logging.info('Starting relay') with DirectoryScope.DirectoryScope(self.webPath): self.startRelayProcess(self.relayScript) logging.info("verifying that shared state is running") self.verifySharedStateRunning() self.desirePublisher = WorkerProcesses( os.path.join(self.uforaPath, 'scripts/init/ufora-worker.py')) except: logging.error( "Couldn't start ClusterSimulation service. Exception=\n%s", traceback.format_exc()) self.dumpRelayLogs() raise
def __init__(self, vdm, offlineCache): Stoppable.Stoppable.__init__(self) self.dependencies_ = TwoWaySetMap.TwoWaySetMap() self.vdm_ = vdm self.offlineCache_ = offlineCache self.finishedValues_ = {} self.intermediates_ = {} self.lock_ = threading.RLock() self.completable_ = Queue.Queue() self.timesComputed = 0 self.computingContexts_ = {} self.computingContexts_t0_ = {} self.isSplit_ = set() self.watchers_ = {} self.contexts_ = [] self.inProcessDownloader = ( OutOfProcessDownloader.OutOfProcessDownloaderPool( Setup.config().cumulusServiceThreadCount, actuallyRunOutOfProcess = False ) ) self.threads_ = [] self.isActive = True #setup the primary cache object, and set its worker threads going for threadIx in range(Setup.config().cumulusServiceThreadCount): workerThread = ManagedThread.ManagedThread(target = self.threadWorker) workerThread.start() self.threads_.append(workerThread)
def constructVDM( callbackScheduler, vectorRamCacheBytes = None, maxRamCacheBytes = None, maxVectorChunkSize = None ): if vectorRamCacheBytes is None: vectorRamCacheBytes = Setup.config().cumulusVectorRamCacheMB * 1024 * 1024 if maxRamCacheBytes is None: maxRamCacheBytes = Setup.config().cumulusMaxRamCacheMB * 1024 * 1024 if maxVectorChunkSize is None: maxVectorChunkSize = Setup.config().maxPageSizeInBytes if maxVectorChunkSize > vectorRamCacheBytes / 32: logging.info( "VDM constructor specified a chunk size of %s MB " + "and a memory size of %s MB. Reducing the chunk size because its too large", vectorRamCacheBytes / 1024.0 / 1024.0, maxVectorChunkSize / 1024.0 / 1024.0 ) maxVectorChunkSize = vectorRamCacheBytes / 32 logging.info("Creating a VDM with %s MB of memory and %s max vector size", vectorRamCacheBytes / 1024.0 / 1024.0, maxVectorChunkSize / 1024.0 / 1024.0 ) VDM = FORANative.VectorDataManager(callbackScheduler, maxVectorChunkSize) VDM.setMemoryLimit(vectorRamCacheBytes, maxRamCacheBytes) return VDM
def main(*args): Setup.config().configureLoggingForBackgroundProgram() try: dataLen = struct.unpack('I', sys.stdin.read(struct.calcsize('I')))[0] data = sys.stdin.read(dataLen) connectionData = pickle.loads(data) maxFD = os.sysconf("SC_OPEN_MAX") for fd in range(3, maxFD): if fd != connectionData['socketFd']: try: os.close(fd) except: pass handler = BackendGatewayRequestHandler( connectionData['socketFd'], connectionData['sharedStateAddress'] ) handler.handle() finally: sys.stderr.write(traceback.format_exc()) sys.stderr.write("closing connection handler\n") sys.stderr.flush() return 0
def __init__(self): callbackSchedulerFactory = CallbackScheduler.createSimpleCallbackSchedulerFactory() self.callbackScheduler = callbackSchedulerFactory.createScheduler("Simulator", 1) self.uforaPath = os.path.abspath(os.path.join(os.path.dirname(__file__), '../')) self.sharedStatePath = os.path.join(self.uforaPath, 'distributed/SharedState') self.sharedStateMainline = os.path.join(self.sharedStatePath, 'sharedStateMainline.py') self.gatewayServiceMainline = os.path.join(self.uforaPath, 'scripts/init/ufora-gateway.py') self.webPath = os.path.join(self.uforaPath, 'web/relay') self.relayScript = os.path.join(self.webPath, 'server.coffee') self.relayPort = Setup.config().relayPort self.relayHttpsPort = Setup.config().relayHttpsPort self.sharedStatePort = Setup.config().sharedStatePort self.restApiPort = Setup.config().restApiPort self.subscribableWebObjectsPort = Setup.config().subscribableWebObjectsPort #create an OutOfProcessDownloader so we can execute commands like 'forever' #from there, instead of forking from the main process (which can run out of memory) self.processPool = OutOfProcessDownloader.OutOfProcessDownloaderPool(1) self.desirePublisher = None self._connectionManager = None
def createServiceAndServiceThread(self): config = Setup.config() config.cumulusMaxRamCacheMB = self.cumulusMaxRamCacheSizeOverride / 1024 / 1024 config.cumulusVectorRamCacheMB = self.cumulusVectorRamCacheSizeOverride / 1024 / 1024 config.cumulusTrackTcmalloc = False config.cumulusServiceThreadCount = self.cumulusThreadCountOverride config.cumulusDiskCacheStorageSubdirectory = str(uuid.uuid4()) ownAddress = str(uuid.uuid4()) callbackScheduler = self.callbackSchedulerFactory.createScheduler( "InMemoryClusterChild", 1) channelListener = self.createMultiChannelListener( callbackScheduler, [Setup.config().cumulusControlPort, Setup.config().cumulusDataPort], ownAddress) service = CumulusService.CumulusService( ownAddress=ownAddress, channelListener=channelListener, channelFactory=self.channelManager.createChannelFactory(), eventHandler=CumulusNative.CumulusWorkerHoldEventsInMemoryEventHandler(), callbackScheduler=callbackScheduler, diagnosticsDir=None, config=config, viewFactory=self.sharedStateViewFactory ) service.startService(lambda: None) return service
def constructVDM(callbackScheduler, vectorRamCacheBytes=None, maxRamCacheBytes=None, maxVectorChunkSize=None): if vectorRamCacheBytes is None: vectorRamCacheBytes = Setup.config( ).cumulusVectorRamCacheMB * 1024 * 1024 if maxRamCacheBytes is None: maxRamCacheBytes = Setup.config().cumulusMaxRamCacheMB * 1024 * 1024 if maxVectorChunkSize is None: maxVectorChunkSize = Setup.config().maxPageSizeInBytes if maxVectorChunkSize > vectorRamCacheBytes / 32: logging.info( "VDM constructor specified a chunk size of %s MB " + "and a memory size of %s MB. Reducing the chunk size because its too large", vectorRamCacheBytes / 1024.0 / 1024.0, maxVectorChunkSize / 1024.0 / 1024.0) maxVectorChunkSize = vectorRamCacheBytes / 32 logging.info("Creating a VDM with %s MB of memory and %s max vector size", vectorRamCacheBytes / 1024.0 / 1024.0, maxVectorChunkSize / 1024.0 / 1024.0) VDM = FORANative.VectorDataManager(callbackScheduler, maxVectorChunkSize) VDM.setMemoryLimit(vectorRamCacheBytes, maxRamCacheBytes) return VDM
def startSharedState(self): cacheDir = Setup.config().getConfigValue( "SHARED_STATE_CACHE", os.path.join(Setup.config().fakeAwsBaseDir, 'ss_cache')) logging.info( "Starting shared state with cache dir '%s' and log file '%s'", cacheDir, self.sharedStateLogFile) with DirectoryScope.DirectoryScope(self.sharedStatePath): args = [ 'forever', '--killSignal', 'SIGTERM', '-l', self.sharedStateLogFile, 'start', '-c', 'python', self.sharedStateMainline, '--cacheDir', cacheDir, '--logging', 'info' ] def sharedStateStdout(msg): logging.info("SHARED STATE OUT> %s", msg) def sharedStateStderr(msg): logging.info("SHARED STATE ERR> %s", msg) startSharedState = SubprocessRunner.SubprocessRunner( args, sharedStateStdout, sharedStateStderr, dict(os.environ)) startSharedState.start() startSharedState.wait(60.0) startSharedState.stop()
def __init__(self, vdm, offlineCache): Stoppable.Stoppable.__init__(self) self.dependencies_ = TwoWaySetMap.TwoWaySetMap() self.vdm_ = vdm self.offlineCache_ = offlineCache self.finishedValuesAndTimeElapsed_ = {} self.intermediates_ = {} self.lock_ = threading.RLock() self.completable_ = Queue.Queue() self.timesComputed = 0 self.computingContexts_ = {} self.computingContexts_t0_ = {} self.isSplit_ = set() self.watchers_ = {} self.contexts_ = [] self.inProcessDownloader = ( OutOfProcessDownloader.OutOfProcessDownloaderPool( Setup.config().cumulusServiceThreadCount, actuallyRunOutOfProcess = False ) ) self.threads_ = [] self.isActive = True #setup the primary cache object, and set its worker threads going for threadIx in range(Setup.config().cumulusServiceThreadCount): workerThread = ManagedThread.ManagedThread(target = self.threadWorker) workerThread.start() self.threads_.append(workerThread)
def __init__(self): callbackSchedulerFactory = CallbackScheduler.createSimpleCallbackSchedulerFactory( ) self.callbackScheduler = callbackSchedulerFactory.createScheduler( "Simulator", 1) self.uforaPath = os.path.abspath( os.path.join(os.path.dirname(__file__), '../')) self.sharedStatePath = os.path.join(self.uforaPath, 'distributed/SharedState') self.sharedStateMainline = os.path.join(self.sharedStatePath, 'sharedStateMainline.py') self.gatewayServiceMainline = os.path.join( self.uforaPath, 'scripts/init/ufora-gateway.py') self.webPath = os.path.join(self.uforaPath, 'web/relay') self.relayScript = os.path.join(self.webPath, 'server.coffee') self.relayPort = Setup.config().relayPort self.relayHttpsPort = Setup.config().relayHttpsPort self.sharedStatePort = Setup.config().sharedStatePort self.restApiPort = Setup.config().restApiPort self.subscribableWebObjectsPort = Setup.config( ).subscribableWebObjectsPort self.desirePublisher = None self._connectionManager = None
def generateTestConfigFileBody_(self): return ("ROOT_DATA_DIR = %s\n" "BASE_PORT = %s\n" "FORA_MAX_MEM_MB = %s\n" ) % ( Setup.config().rootDataDir, Setup.config().basePort, "10000" if multiprocessing.cpu_count() <= 8 else "60000" )
def generateTestConfigFileBody_(self): return ("ROOT_DATA_DIR = %s\n" "BASE_PORT = %s\n" "FORA_MAX_MEM_MB = %s\n" ) % ( Setup.config().rootDataDir, Setup.config().basePort, "10000" if multiprocessing.cpu_count() <= 8 else "60000" )
def runPythonUnitTests_(args, testFilter, testGroupName, testFiles): testArgs = ["dummy"] if args.testHarnessVerbose or args.list: testArgs.append('--nocaptureall') testArgs.append('--verbosity=0') if not args.list: print "Executing %s unit tests." % testGroupName Setup.config().configureLoggingForUserProgram() parser = PythonTestArgumentParser() filterActions = parser.parse_args(args.remainder) bsaRootDir = os.path.split(ufora.__file__)[0] testCasesToRun = [] plugins = nose.plugins.manager.PluginManager([OutputCaptureNosePlugin()]) config = nose.config.Config(plugins=plugins) config.configure(testArgs) for i in range(args.copies): testCases = UnitTestCommon.loadTestCases(config, testFiles, bsaRootDir, 'ufora') if filterActions: testCases = applyFilterActions(filterActions, testCases) testCasesToRun += testCases if testFilter is not None: testCasesToRun = testFilter(testCasesToRun) if args.list: for test in testCasesToRun: print test.id() os._exit(0) if args.random: import random random.shuffle(testCasesToRun) if args.pythreadcheck: results = {} for test in testCasesToRun: results[test] = runPyTestSuite(config, None, unittest.TestSuite([test]), testArgs) return True in results.values() else: testFiles = '.' return runPyTestSuite(config, None, testCasesToRun, testArgs)
def runPythonUnitTests_(args, testFilter, testGroupName, testFiles): testArgs = ["dummy"] if args.testHarnessVerbose or args.list: testArgs.append('--nocaptureall') testArgs.append('--verbosity=0') if not args.list: print "Executing %s unit tests." % testGroupName Setup.config().configureLoggingForUserProgram() parser = PythonTestArgumentParser() filterActions = parser.parse_args(args.remainder) bsaRootDir = os.path.split(ufora.__file__)[0] testCasesToRun = [] plugins = nose.plugins.manager.PluginManager([OutputCaptureNosePlugin()]) config = nose.config.Config(plugins=plugins) config.configure(testArgs) for i in range(args.copies): testCases = UnitTestCommon.loadTestCases(config, testFiles, bsaRootDir, 'ufora') if filterActions: testCases = applyFilterActions(filterActions, testCases) testCasesToRun += testCases if testFilter is not None: testCasesToRun = testFilter(testCasesToRun) if args.list: for test in testCasesToRun: print test.id() os._exit(0) if args.random: import random random.shuffle(testCasesToRun) if args.pythreadcheck: results = {} for test in testCasesToRun: results[test] = runPyTestSuite(config, None, unittest.TestSuite([test]), testArgs) return True in results.values() else: testFiles = '.' return runPyTestSuite(config, None, testCasesToRun, testArgs)
def createService(args): callbackSchedulerFactory = CallbackScheduler.createSimpleCallbackSchedulerFactory() callbackScheduler = callbackSchedulerFactory.createScheduler('ufora-worker', 1) channelListener = MultiChannelListener(callbackScheduler, [args.base_port, args.base_port + 1]) sharedStateViewFactory = ViewFactory.ViewFactory.TcpViewFactory( callbackSchedulerFactory.createScheduler('SharedState', 1), args.manager_address, int(args.manager_port) ) channelFactory = TcpChannelFactory.TcpStringChannelFactory(callbackScheduler) diagnostics_dir = os.getenv("UFORA_WORKER_DIAGNOSTICS_DIR") eventHandler = diagnostics_dir and createEventHandler( diagnostics_dir, callbackSchedulerFactory.createScheduler("ufora-worker-event-handler", 1) ) own_address = args.own_address or get_own_ip() print "Listening on", own_address, "ports:", args.base_port, "and", args.base_port+1 config = Setup.config() print "RAM cache of %d / %d MB and %d threads. Track tcmalloc: %s" % ( config.cumulusVectorRamCacheMB, config.cumulusMaxRamCacheMB, config.cumulusServiceThreadCount, config.cumulusTrackTcmalloc ) print "Ufora store at %s:%s" % (args.manager_address, args.manager_port) s3InterfaceFactory = ActualS3Interface.ActualS3InterfaceFactory() print "PythonIoTasks threads: %d. Out of process: %s" % ( config.externalDatasetLoaderServiceThreads, s3InterfaceFactory.isCompatibleWithOutOfProcessDownloadPool ) return CumulusService.CumulusService( own_address, channelListener, channelFactory, eventHandler, callbackScheduler, diagnostics_dir, Setup.config(), viewFactory=sharedStateViewFactory, s3InterfaceFactory=s3InterfaceFactory, objectStore=NullObjectStore.NullObjectStore() )
def __init__(self, offlineCacheFunction, newMemLimit, remoteEvaluator=None, newLoadRatio=.5, maxPageSizeInBytes=None, vdmOverride=None): if maxPageSizeInBytes is None: maxPageSizeInBytes = Setup.config().maxPageSizeInBytes if vdmOverride is not None: self.vdm_ = vdmOverride self.offlineCache_ = None else: self.vdm_ = FORANative.VectorDataManager( CallbackSchedulerNative.createSimpleCallbackSchedulerFactory(). createScheduler("LocalEvaluator", 1), maxPageSizeInBytes) self.vdm_.setDropUnreferencedPagesWhenFull(True) self.offlineCache_ = offlineCacheFunction(self.vdm_) if self.offlineCache_ is not None: self.vdm_.setOfflineCache(self.offlineCache_) logging.info("LocalEvaluator Creating VDMC with %s MB", newMemLimit / 1024.0 / 1024.0) self.vdm_.setMemoryLimit(newMemLimit, int(newMemLimit * 1.25)) self.vdm_.setLoadRatio(newLoadRatio) self.remoteEvaluator_ = remoteEvaluator self.cache_ = ComputationCache(self.vdm_, self.offlineCache_)
def setUpClass(cls): cls.config = Setup.config() cls.executor = None cls.simulation = ClusterSimulation.Simulator.createGlobalSimulator() cls.simulation.startService() cls.simulation.getDesirePublisher().desireNumberOfWorkers(1) cls.ufora = pyfora.connect('http://localhost:30000')
def defaultLocalEvaluator(remoteEvaluator=None, vdmOverride=None): return LocalEvaluator( lambda vdm: None, Setup.config().cumulusVectorRamCacheMB * 1024 * 1024, remoteEvaluator, vdmOverride=vdmOverride )
def test_teardown_simple(self): vdm = FORANative.VectorDataManager(callbackScheduler, Setup.config().maxPageSizeInBytes) context = ExecutionContext.ExecutionContext(dataManager = vdm) context.evaluate( FORA.extractImplValContainer( FORA.eval("fun(){nothing}") ), FORANative.symbol_Call ) context.getFinishedResult() toEval = FORA.extractImplValContainer( FORA.eval( """fun() { let f = fun() { }; let v = [1, [3]]; cached(f()) }""" ) ) context.evaluate(toEval, FORANative.symbol_Call) while not context.isCacheRequest(): context.resume() context.teardown(True)
def test_teardown(self): harness = SharedStateTestHarness.SharedStateTestHarness(False, port=Setup.config().sharedStatePort) view = harness.newView() view.teardown() harness.teardown()
def test_teardown_simple(self): vdm = FORANative.VectorDataManager(callbackScheduler, Setup.config().maxPageSizeInBytes) context = ExecutionContext.ExecutionContext( dataManager=vdm, allowInternalSplitting=False) evaluate(context, FORA.extractImplValContainer(FORA.eval("fun(){nothing}")), FORANative.symbol_Call) context.getFinishedResult() toEval = FORA.extractImplValContainer( FORA.eval("""fun() { let f = fun() { }; let v = [1, [3]]; cached(f()) }""")) evaluate(context, toEval, FORANative.symbol_Call) while not context.isCacheRequest(): context.compute() context.teardown(True)
def defaultLocalEvaluator(remoteEvaluator=None, vdmOverride=None): return LocalEvaluator( lambda vdm: None, Setup.config().cumulusVectorRamCacheMB * 1024 * 1024, remoteEvaluator, vdmOverride=vdmOverride )
def test_extractPausedComputationDuringVectorLoad(self): self.runtime = Runtime.getMainRuntime() #self.dynamicOptimizer = self.runtime.dynamicOptimizer vdm = FORANative.VectorDataManager(callbackScheduler, Setup.config().maxPageSizeInBytes) context = ExecutionContext.ExecutionContext( dataManager = vdm, allowInterpreterTracing = False ) context.evaluate( FORA.extractImplValContainer(FORA.eval("fun() { [1,2,3].paged }")), FORANative.ImplValContainer(FORANative.makeSymbol("Call")) ) pagedVec = context.getFinishedResult().asResult.result context.placeInEvaluationState( FORANative.ImplValContainer( (pagedVec, FORANative.ImplValContainer(FORANative.makeSymbol("GetItem")), FORANative.ImplValContainer(0)) ) ) vdm.unloadAllPossible() context.resume() self.assertTrue(context.isVectorLoad()) computation = context.extractPausedComputation() self.assertEqual(len(computation.frames),1)
def createViewFactory(): callbackSchedulerFactory = CallbackScheduler.createSimpleCallbackSchedulerFactory() return ViewFactory.ViewFactory.TcpViewFactory( callbackSchedulerFactory.createScheduler('fora-interpreter', 1), 'localhost', Setup.config().sharedStatePort )
def test_refcountsInCompiledCode(self): vdm = FORANative.VectorDataManager(callbackScheduler, Setup.config().maxPageSizeInBytes) context = ExecutionContext.ExecutionContext( dataManager = vdm, allowInterpreterTracing = True, blockUntilTracesAreCompiled = True ) text = """fun(){ let f = fun(v, depth) { if (depth > 100) //this will trigger an interrupt since the data cannot exist in the VDM datasets.s3('','') else f(v, depth+1) } f([1,2,3,4,5], 0) }""" context.evaluate( FORA.extractImplValContainer(FORA.eval(text)), FORANative.symbol_Call ) stacktraceText = context.extractCurrentTextStacktrace() self.assertTrue(stacktraceText.count("Vector") < 10)
def test_verifyThatExtractingPausedComputationsDoesntDuplicateLargeStrings(self): text = """fun() { let s = ' ' while (size(s) < 1000000) s = s + s let f = fun(x) { if (x > 0) return f(x-1) + s[x]; `TriggerInterruptForTesting() } f(20) }""" vdm = FORANative.VectorDataManager(callbackScheduler, Setup.config().maxPageSizeInBytes) context = ExecutionContext.ExecutionContext( dataManager = vdm, allowInterpreterTracing = False ) context.evaluate( FORA.extractImplValContainer(FORA.eval(text)), FORANative.symbol_Call ) computation = context.extractPausedComputation() context2 = ExecutionContext.ExecutionContext( dataManager = vdm, allowInterpreterTracing = False ) context2.resumePausedComputation(computation) self.assertTrue( context2.totalBytesUsed < 2 * context.totalBytesUsed )
def createViewFactory(): callbackSchedulerFactory = CallbackScheduler.createSimpleCallbackSchedulerFactory( ) return ViewFactory.ViewFactory.TcpViewFactory( callbackSchedulerFactory.createScheduler('fora-interpreter', 1), 'localhost', Setup.config().sharedStatePort)
def test_extractPausedComputationDuringVectorLoad(self): self.runtime = Runtime.getMainRuntime() #self.dynamicOptimizer = self.runtime.dynamicOptimizer vdm = FORANative.VectorDataManager(callbackScheduler, Setup.config().maxPageSizeInBytes) context = ExecutionContext.ExecutionContext( dataManager=vdm, allowInterpreterTracing=False) context.evaluate( FORA.extractImplValContainer(FORA.eval("fun() { [1,2,3].paged }")), FORANative.ImplValContainer(FORANative.makeSymbol("Call"))) pagedVec = context.getFinishedResult().asResult.result context.placeInEvaluationState( FORANative.ImplValContainer( (pagedVec, FORANative.ImplValContainer(FORANative.makeSymbol("GetItem")), FORANative.ImplValContainer(0)))) vdm.unloadAllPossible() context.resume() self.assertTrue(context.isVectorLoad()) computation = context.extractPausedComputation() self.assertEqual(len(computation.frames), 1)
def createService(args): callbackSchedulerFactory = CallbackScheduler.createSimpleCallbackSchedulerFactory() callbackScheduler = callbackSchedulerFactory.createScheduler('ufora-worker', 1) channelListener = MultiChannelListener(callbackScheduler, [args.base_port, args.base_port + 1]) sharedStateViewFactory = ViewFactory.ViewFactory.TcpViewFactory( callbackSchedulerFactory.createScheduler('SharedState', 1), args.manager_address, int(args.manager_port) ) channelFactory = TcpChannelFactory.TcpStringChannelFactory(callbackScheduler) diagnostics_dir = os.getenv("UFORA_WORKER_DIAGNOSTICS_DIR") eventHandler = diagnostics_dir and createEventHandler( diagnostics_dir, callbackSchedulerFactory.createScheduler("ufora-worker-event-handler", 1) ) own_address = args.own_address or get_own_ip() print "Listening on", own_address, "ports:", args.base_port, "and", args.base_port+1 return CumulusService.CumulusService( own_address, channelListener, channelFactory, eventHandler, callbackScheduler, diagnostics_dir, Setup.config(), viewFactory=sharedStateViewFactory )
def __init__(self, relayHostname, relayHttpsPort=None, messageDelayInSeconds=None): """Initialize a PipeTransport. messageDelayInSeconds - if not None, then all messages will be delayed by this many seconds before being pumped into the receiving channel. This can simulate delays talking over the internet. """ self.onMessageReceived = None self.onDisconnected = None self.inputLoopThread = None self.isShuttingDown = False self.proxyProcess = None self.isConnected = False self.messageDelayInSeconds = messageDelayInSeconds self.messagePumpThread = None self.messagePumpQueue = Queue.Queue() self.relayHostname = relayHostname if relayHttpsPort: self.relayHttpsPort = relayHttpsPort else: self.relayHttpsPort = Setup.config().relayHttpsPort self.proxyStdIn = None self.proxyStdOut = None self.proxyStdErr = None self.proxyOutputThread = None logging.info("PipeTransport created for host %s:%s", self.relayHostname, self.relayHttpsPort)
def test_serialize_while_holding_interior_vector(self): vdm = FORANative.VectorDataManager(callbackScheduler, Setup.config().maxPageSizeInBytes) context = ExecutionContext.ExecutionContext( dataManager=vdm, allowInterpreterTracing=False, allowInternalSplitting=False) evaluate( context, FORA.extractImplValContainer( FORA.eval(""" fun() { let v = [[1].paged].paged; let v2 = v[0] `TriggerInterruptForTesting() 1+2+3+v+v2 }""")), FORANative.symbol_Call) self.assertTrue(context.isInterrupted()) serialized = context.serialize() context = None
def _addWorker(self): worker_id = uuid.uuid4() thread = threading.Thread(target=self._runWorker, args=(worker_id, Setup.config().fakeAwsBaseDir)) self.threads[worker_id] = thread thread.start()
def test_teardown_during_vector_load(self): vdm = FORANative.VectorDataManager(callbackScheduler, Setup.config().maxPageSizeInBytes) context = ExecutionContext.ExecutionContext(dataManager = vdm) context.evaluate( FORA.extractImplValContainer( FORA.eval("fun() { let v = [1,2,3].paged; fun() { v[1] } }") ), FORANative.symbol_Call ) vdm.unloadAllPossible() pagedVecAccessFun = context.getFinishedResult().asResult.result context.teardown() context.evaluate( pagedVecAccessFun, FORANative.symbol_Call ) self.assertFalse(context.isInterrupted()) self.assertTrue(context.isVectorLoad()) context.teardown()
def test_refcountsInCompiledCode(self): vdm = FORANative.VectorDataManager(callbackScheduler, Setup.config().maxPageSizeInBytes) context = ExecutionContext.ExecutionContext( dataManager=vdm, allowInterpreterTracing=True, blockUntilTracesAreCompiled=True) text = """fun(){ let f = fun(v, depth) { if (depth > 100) //this will trigger an interrupt since the data cannot exist in the VDM datasets.s3('','') else f(v, depth+1) } f([1,2,3,4,5], 0) }""" context.evaluate(FORA.extractImplValContainer(FORA.eval(text)), FORANative.symbol_Call) stacktraceText = context.extractCurrentTextStacktrace() self.assertTrue(stacktraceText.count("Vector") < 10)
def parseS3Dataset(s3InterfaceFactory, s3Dataset): """Log in to amazon S3 and return an appropriate s3Interface and a bucket/keypair""" if s3Dataset.isInternal(): #use the internal login. This should have access only to our one internal bucket return ( s3InterfaceFactory(), Setup.config().userDataS3Bucket, s3Dataset.asInternal.keyname ) elif s3Dataset.isExternal(): asE = s3Dataset.asExternal if asE.awsAccessKey != "": try: interface = s3InterfaceFactory( asE.awsAccessKey, asE.awsSecretKey ) except: raise InvalidDatasetException("Failed to log into S3 with given credentials") else: interface = s3InterfaceFactory() return (interface, asE.bucket, asE.key) else: raise DatasetLoadException("Unknown dataset type")
def test_resumingAfterCopyDataOutOfPages(self): vdm = FORANative.VectorDataManager(callbackScheduler, Setup.config().maxPageSizeInBytes) context = ExecutionContext.ExecutionContext(dataManager=vdm) context.interruptAfterCycleCount(100000) text = """ fun() { let v = Vector.range(1000).paged; let ix1 = 0 let res = 0 while (true) { res = res + v[ix1] ix1 = (ix1 + 1) % size(v) } res }""" context.evaluate(FORA.extractImplValContainer(FORA.eval(text)), FORANative.symbol_Call) paused1 = context.extractPausedComputation() while not context.isVectorLoad(): context.copyValuesOutOfVectorPages() vdm.unloadAllPossible() context.interruptAfterCycleCount(100000) context.resume() paused2 = context.extractPausedComputation() self.assertTrue(len(paused1.frames) == len(paused2.frames))
def test_verifyThatExtractingPausedComputationsDoesntDuplicateLargeStrings( self): text = """fun() { let s = ' ' while (size(s) < 1000000) s = s + s let f = fun(x) { if (x > 0) return f(x-1) + s[x]; `TriggerInterruptForTesting() } f(20) }""" vdm = FORANative.VectorDataManager(callbackScheduler, Setup.config().maxPageSizeInBytes) context = ExecutionContext.ExecutionContext( dataManager=vdm, allowInterpreterTracing=False) context.evaluate(FORA.extractImplValContainer(FORA.eval(text)), FORANative.symbol_Call) computation = context.extractPausedComputation() context2 = ExecutionContext.ExecutionContext( dataManager=vdm, allowInterpreterTracing=False) context2.resumePausedComputation(computation) self.assertTrue(context2.totalBytesUsed < 2 * context.totalBytesUsed)
def KeyspaceManager(randomSeed, numManagers, backupInterval=60*10, pingInterval=20, cachePathOverride=None, maxOpenFiles=None, maxLogFileSizeMb=10): if cachePathOverride is None: cachePathOverride = Setup.config().sharedStateCache if maxOpenFiles is None: import resource maxOpenFiles = min(resource.getrlimit(resource.RLIMIT_NOFILE)[0] / 2, 1000) if cachePathOverride != "": logging.info( "Creating FileStorage(cachePathOverride=%s, maxOpenFiles=%s, maxLogFileSizeMb=%s)", cachePathOverride, maxOpenFiles, maxLogFileSizeMb) storage = SharedStateNative.Storage.FileStorage(cachePathOverride, maxOpenFiles, maxLogFileSizeMb) else: storage = None return SharedStateNative.KeyspaceManager( randomSeed, numManagers, backupInterval, pingInterval, storage )
def test_serialize_while_holding_interior_vector(self): vdm = FORANative.VectorDataManager(callbackScheduler, Setup.config().maxPageSizeInBytes) context = ExecutionContext.ExecutionContext(dataManager = vdm, allowInterpreterTracing=False) context.evaluate( FORA.extractImplValContainer( FORA.eval(""" fun() { let v = [[1].paged].paged; let v2 = v[0] `TriggerInterruptForTesting() 1+2+3+v+v2 }""" ) ), FORANative.symbol_Call ) self.assertTrue(context.isInterrupted()) serialized = context.serialize() context = None
def setUpClass(cls): cls.config = Setup.config() cls.executor = None cls.simulation = ClusterSimulation.Simulator.createGlobalSimulator() cls.simulation.startService() cls.simulation.getDesirePublisher().desireNumberOfWorkers(1) cls.ufora = pyfora.connect('http://localhost:30000')
def __call__(self): start, stop = self.lowOffset, self.highOffset logging.info("Starting extraction of %s, %s, [%s, %s]", self.bucketname, self.keyname, start, stop) t0 = time.time() totalThreads = Setup.config().externalDatasetLoaderThreadcount def downloadThread(ix): def downloader(): low = start + (stop - start) * ix / totalThreads high = start + (stop - start) * (ix + 1) / totalThreads tries = 0 while True: try: results[ix] = self.s3Interface.getKeyValueOverRange( self.bucketname, self.keyname, low, high) return except: if tries < 10: logging.warn( "Task %s had an exception:%s\nTries = %s. We will fail the " + "request when 'tries' gets to 10", self, traceback.format_exc(), tries) tries += 1 else: results[ix] = sys.exc_info() return return downloader results = [] threads = [] for ix in range(totalThreads): results.append(None) threads.append(threading.Thread(target=downloadThread(ix))) for t in threads: t.start() for t in threads: t.join() for r in results: if isinstance(r, tuple): exc_info = r raise exc_info[0], exc_info[1], exc_info[2] result = "".join(results) assert len(result) == stop - start logging.info("Actually extracting %s from s3 took %s", len(result) / 1024 / 1024.0, time.time() - t0) return result
def copyDataOutOfPagesTest(self, text, cycleCount, expectsToHaveCopies): vdm = FORANative.VectorDataManager(callbackScheduler, Setup.config().maxPageSizeInBytes) context = ExecutionContext.ExecutionContext( dataManager=vdm, allowInterpreterTracing=False, allowInternalSplitting=False) context.configuration.agressivelyValidateRefcountsAndPageReachability = True context.configuration.releaseVectorHandlesImmediatelyAfterExecution = False context.placeInEvaluationState( FORANative.ImplValContainer( (FORA.extractImplValContainer(FORA.eval(text)), FORANative.symbol_Call))) context.interruptAfterCycleCount(cycleCount) context.compute() if expectsToHaveCopies: self.assertTrue(context.copyValuesOutOfVectorPages()) self.assertFalse(context.copyValuesOutOfVectorPages()) else: self.assertFalse(context.copyValuesOutOfVectorPages())
def test_teardown(self): harness = SharedStateTestHarness.SharedStateTestHarness( False, port=Setup.config().sharedStatePort) view = harness.newView() view.teardown() harness.teardown()
def KeyspaceManager(randomSeed, numManagers, backupInterval=60 * 10, pingInterval=20, cachePathOverride=None, maxOpenFiles=None, maxLogFileSizeMb=10): if cachePathOverride is None: cachePathOverride = Setup.config().sharedStateCache if maxOpenFiles is None: import resource maxOpenFiles = min( resource.getrlimit(resource.RLIMIT_NOFILE)[0] / 2, 1000) if cachePathOverride != "": logging.info( "Creating FileStorage(cachePathOverride=%s, maxOpenFiles=%s, maxLogFileSizeMb=%s)", cachePathOverride, maxOpenFiles, maxLogFileSizeMb) storage = SharedStateNative.Storage.FileStorage( cachePathOverride, maxOpenFiles, maxLogFileSizeMb) else: storage = None return SharedStateNative.KeyspaceManager(randomSeed, numManagers, backupInterval, pingInterval, storage)
def __init__(self, relayHostname, relayHttpsPort = None, messageDelayInSeconds = None): """Initialize a PipeTransport. messageDelayInSeconds - if not None, then all messages will be delayed by this many seconds before being pumped into the receiving channel. This can simulate delays talking over the internet. """ self.onMessageReceived = None self.onDisconnected = None self.inputLoopThread = None self.isShuttingDown = False self.proxyProcess = None self.isConnected = False self.messageDelayInSeconds = messageDelayInSeconds self.messagePumpThread = None self.messagePumpQueue = Queue.Queue() self.relayHostname = relayHostname if relayHttpsPort: self.relayHttpsPort = relayHttpsPort else: self.relayHttpsPort = Setup.config().relayHttpsPort self.proxyStdIn = None self.proxyStdOut = None self.proxyStdErr = None self.proxyOutputThread = None logging.info("PipeTransport created for host %s:%s", self.relayHostname, self.relayHttpsPort)
def testDualConnectThrows(self): server1 = SimpleServer.SimpleServer(port = Setup.config().testPort) thread1 = threading.Thread(target=suppressExceptionLogging(server1.runListenLoop)) thread1.start() server1.blockUntilListening() server2 = SimpleServer.SimpleServer(port = Setup.config().testPort) thread2 = threading.Thread(target=suppressExceptionLogging(server2.runListenLoop)) thread2.start() self.assertRaises(Exception, server2.blockUntilListening) server1.stop() server2.stop() thread1.join() thread2.join()
def setUpClass(cls): import logging logging.getLogger('requests').setLevel(logging.WARNING) logging.basicConfig(level=logging.DEBUG) cls.config = Setup.config() cls.simulation = ClusterSimulation.Simulator.createGlobalSimulator() cls.simulation.startService()
def setUpClass(cls): import logging logging.getLogger('requests').setLevel(logging.WARNING) logging.basicConfig(level=logging.DEBUG) cls.config = Setup.config() cls.simulation = ClusterSimulation.Simulator.createGlobalSimulator() cls.simulation.startService()
def setUpClass(cls): cls.config = Setup.config() cls.executor = None cls.simulation = ClusterSimulation.Simulator.createGlobalSimulator() cls.simulation.startService() cls.simulation.getDesirePublisher().desireNumberOfWorkers(1) cls.cur_dir = os.path.dirname(os.path.realpath('__file__'))
def setUpClass(cls): cls.config = Setup.config() cls.executor = None cls.simulation = ClusterSimulation.Simulator.createGlobalSimulator() cls.simulation.startService() cls.simulation.getDesirePublisher().desireNumberOfWorkers(1) cls.waitUntilConnected()
def setUp(): # clear Compiler Cache and run some test ccdir = Setup.config().compilerDiskCacheDir filesBefore = os.listdir(ccdir) for file in filesBefore: filePath = os.path.join(ccdir, file) os.remove(filePath) runSomeFora()
def TcpViewFactory(callbackScheduler, address=None, port=None): if address is None: raise ValueError("address cannot be None") if port is None: port = Setup.config().sharedStatePort channelFactory = TcpMessageChannelFactory(callbackScheduler, address, port) return ViewFactory(channelFactory)
def test_resumePausedComputationWithResult(self): self.runtime = Runtime.getMainRuntime() #self.dynamicOptimizer = self.runtime.dynamicOptimizer vdm = FORANative.VectorDataManager(callbackScheduler, Setup.config().maxPageSizeInBytes) context = ExecutionContext.ExecutionContext( dataManager = vdm, allowInterpreterTracing = False ) text = """ let f = fun(v, ix) { if (ix > 0) { let (v2,res) = f(v,ix-1); return (v2, res + v2[0]) } `TriggerInterruptForTesting() return (v, 0) }; f([1], 10) """ context.evaluate( FORA.extractImplValContainer(FORA.eval("fun() { " + text + " }")), FORANative.ImplValContainer(FORANative.makeSymbol("Call")) ) assert context.isInterrupted() pausedComp = context.extractPausedComputation() framesToUse = pausedComp.frames[0:5] pausedComp2 = FORANative.PausedComputation( framesToUse, FORA.extractImplValContainer(FORA.eval("([2], 0)", keepAsForaValue=True)), False ) context.resumePausedComputation(pausedComp2) context.copyValuesOutOfVectorPages() context.pageLargeVectorHandles(0) context.resetInterruptState() context.resume() self.assertTrue( context.isFinished() ) result = context.getFinishedResult() self.assertTrue(result.asResult.result[1].pyval == 6)