def __init__(self): callbackSchedulerFactory = CallbackScheduler.createSimpleCallbackSchedulerFactory( ) self.callbackScheduler = callbackSchedulerFactory.createScheduler( "Simulator", 1) self.uforaPath = os.path.abspath( os.path.join(os.path.dirname(__file__), '../')) self.sharedStatePath = os.path.join(self.uforaPath, 'distributed/SharedState') self.sharedStateMainline = os.path.join(self.sharedStatePath, 'sharedStateMainline.py') self.gatewayServiceMainline = os.path.join( self.uforaPath, 'scripts/init/ufora-gateway.py') self.webPath = os.path.join(self.uforaPath, 'web/relay') self.relayScript = os.path.join(self.webPath, 'server.coffee') self.relayPort = Setup.config().relayPort self.relayHttpsPort = Setup.config().relayHttpsPort self.sharedStatePort = Setup.config().sharedStatePort self.restApiPort = Setup.config().restApiPort self.subscribableWebObjectsPort = Setup.config( ).subscribableWebObjectsPort #create an OutOfProcessDownloader so we can execute commands like 'forever' #from there, instead of forking from the main process (which can run out of memory) self.processPool = OutOfProcessDownloader.OutOfProcessDownloaderPool(1) self.desirePublisher = None self._connectionManager = None
def main(*args): Setup.config().configureLoggingForBackgroundProgram() try: dataLen = struct.unpack('I', sys.stdin.read(struct.calcsize('I')))[0] data = sys.stdin.read(dataLen) connectionData = pickle.loads(data) maxFD = os.sysconf("SC_OPEN_MAX") for fd in range(3, maxFD): if fd != connectionData['socketFd']: try: os.close(fd) except: pass handler = BackendGatewayRequestHandler( connectionData['socketFd'], connectionData['sharedStateAddress'] ) handler.handle() finally: sys.stderr.write(traceback.format_exc()) sys.stderr.write("closing connection handler\n") sys.stderr.flush() return 0
def startSharedState(self): cacheDir = Setup.config().getConfigValue( "SHARED_STATE_CACHE", os.path.join(Setup.config().fakeAwsBaseDir, 'ss_cache')) logging.info( "Starting shared state with cache dir '%s' and log file '%s'", cacheDir, self.sharedStateLogFile) with DirectoryScope.DirectoryScope(self.sharedStatePath): args = [ 'forever', '--killSignal', 'SIGTERM', '-l', self.sharedStateLogFile, 'start', '-c', 'python', self.sharedStateMainline, '--cacheDir', cacheDir, '--logging', 'info' ] def sharedStateStdout(msg): logging.info("SHARED STATE OUT> %s", msg) def sharedStateStderr(msg): logging.info("SHARED STATE ERR> %s", msg) startSharedState = SubprocessRunner.SubprocessRunner( args, sharedStateStdout, sharedStateStderr, dict(os.environ)) startSharedState.start() startSharedState.wait(60.0) startSharedState.stop()
def __init__(self, vdm, offlineCache): Stoppable.Stoppable.__init__(self) self.dependencies_ = TwoWaySetMap.TwoWaySetMap() self.vdm_ = vdm self.offlineCache_ = offlineCache self.finishedValuesAndTimeElapsed_ = {} self.intermediates_ = {} self.lock_ = threading.RLock() self.completable_ = Queue.Queue() self.timesComputed = 0 self.computingContexts_ = {} self.computingContexts_t0_ = {} self.isSplit_ = set() self.watchers_ = {} self.contexts_ = [] self.inProcessDownloader = ( OutOfProcessDownloader.OutOfProcessDownloaderPool( Setup.config().cumulusServiceThreadCount, actuallyRunOutOfProcess = False ) ) self.threads_ = [] self.isActive = True #setup the primary cache object, and set its worker threads going for threadIx in range(Setup.config().cumulusServiceThreadCount): workerThread = ManagedThread.ManagedThread(target = self.threadWorker) workerThread.start() self.threads_.append(workerThread)
def startSharedState(self): cacheDir = Setup.config().getConfigValue( "SHARED_STATE_CACHE", os.path.join(Setup.config().fakeAwsBaseDir, 'ss_cache') ) logging.info("Starting shared state with cache dir '%s' and log file '%s'", cacheDir, self.sharedStateLogFile) with DirectoryScope.DirectoryScope(self.sharedStatePath): args = ['forever', '--killSignal', 'SIGTERM', '-l', self.sharedStateLogFile, 'start', '-c', 'python', self.sharedStateMainline, '--cacheDir', cacheDir, '--logging', 'info' ] def sharedStateStdout(msg): logging.info("SHARED STATE OUT> %s", msg) def sharedStateStderr(msg): logging.info("SHARED STATE ERR> %s", msg) startSharedState = SubprocessRunner.SubprocessRunner( args, sharedStateStdout, sharedStateStderr, dict(os.environ) ) startSharedState.start() startSharedState.wait(60.0) startSharedState.stop()
def createServiceAndServiceThread(self): config = Setup.config() config.cumulusMaxRamCacheMB = self.cumulusMaxRamCacheSizeOverride / 1024 / 1024 config.cumulusVectorRamCacheMB = self.cumulusVectorRamCacheSizeOverride / 1024 / 1024 config.cumulusTrackTcmalloc = False config.cumulusServiceThreadCount = self.cumulusThreadCountOverride config.cumulusDiskCacheStorageSubdirectory = str(uuid.uuid4()) ownAddress = str(uuid.uuid4()) callbackScheduler = self.callbackSchedulerFactory.createScheduler( "InMemoryClusterChild", 1) channelListener = self.createMultiChannelListener( callbackScheduler, [Setup.config().cumulusControlPort, Setup.config().cumulusDataPort], ownAddress) service = CumulusService.CumulusService( ownAddress=ownAddress, channelListener=channelListener, channelFactory=self.channelManager.createChannelFactory(), eventHandler=CumulusNative.CumulusWorkerHoldEventsInMemoryEventHandler(), callbackScheduler=callbackScheduler, diagnosticsDir=None, config=config, viewFactory=self.sharedStateViewFactory ) service.startService(lambda: None) return service
def __init__(self, callbackScheduler, cachePathOverride=None, port=None): self.callbackScheduler = callbackScheduler port = Setup.config().sharedStatePort logging.info("Initializing SharedStateService with port = %s", port) self.cachePath = cachePathOverride if cachePathOverride is not None else \ Setup.config().sharedStateCache if self.cachePath != '' and not os.path.exists(self.cachePath): os.makedirs(self.cachePath) CloudService.Service.__init__(self) self.socketServer = SimpleServer.SimpleServer(port) self.keyspaceManager = KeyspaceManager( 0, 1, pingInterval=120, cachePathOverride=cachePathOverride ) self.socketServer._onConnect = self.onConnect self.socketServerThread = ManagedThread.ManagedThread(target=self.socketServer.start) self.logfilePruneThread = ManagedThread.ManagedThread(target=self.logFilePruner) self.stoppedFlag = threading.Event()
def constructVDM( callbackScheduler, vectorRamCacheBytes = None, maxRamCacheBytes = None, maxVectorChunkSize = None ): if vectorRamCacheBytes is None: vectorRamCacheBytes = Setup.config().cumulusVectorRamCacheMB * 1024 * 1024 if maxRamCacheBytes is None: maxRamCacheBytes = Setup.config().cumulusMaxRamCacheMB * 1024 * 1024 if maxVectorChunkSize is None: maxVectorChunkSize = Setup.config().maxPageSizeInBytes if maxVectorChunkSize > vectorRamCacheBytes / 32: logging.info( "VDM constructor specified a chunk size of %s MB " + "and a memory size of %s MB. Reducing the chunk size because its too large", vectorRamCacheBytes / 1024.0 / 1024.0, maxVectorChunkSize / 1024.0 / 1024.0 ) maxVectorChunkSize = vectorRamCacheBytes / 32 logging.info("Creating a VDM with %s MB of memory and %s max vector size", vectorRamCacheBytes / 1024.0 / 1024.0, maxVectorChunkSize / 1024.0 / 1024.0 ) VDM = FORANative.VectorDataManager(callbackScheduler, maxVectorChunkSize) VDM.setMemoryLimit(vectorRamCacheBytes, maxRamCacheBytes) return VDM
def UserFacingMainline(main, argv, modulesToInitialize=None, parser=None): """Helper function that initializes some modules and then calls main. Used to centralize error handling for common initialization routines and to set up the initial component hosts. """ if parser is None: parser = Setup.defaultParser() setup = Setup.defaultSetup() parsedArguments = parser.parse_args(argv[1:]) setup.processArgs(parsedArguments) setup.config.configureLoggingForUserProgram() with Setup.PushSetup(setup): initializeModules(modulesToInitialize) result = main(parsedArguments) if result is None: result = 0 sys.stdout.flush() sys.stderr.flush() os._exit(result)
def __init__(self, vdm, offlineCache): Stoppable.Stoppable.__init__(self) self.dependencies_ = TwoWaySetMap.TwoWaySetMap() self.vdm_ = vdm self.offlineCache_ = offlineCache self.finishedValues_ = {} self.intermediates_ = {} self.lock_ = threading.RLock() self.completable_ = Queue.Queue() self.timesComputed = 0 self.computingContexts_ = {} self.computingContexts_t0_ = {} self.isSplit_ = set() self.watchers_ = {} self.contexts_ = [] self.inProcessDownloader = ( OutOfProcessDownloader.OutOfProcessDownloaderPool( Setup.config().cumulusServiceThreadCount, actuallyRunOutOfProcess = False ) ) self.threads_ = [] self.isActive = True #setup the primary cache object, and set its worker threads going for threadIx in range(Setup.config().cumulusServiceThreadCount): workerThread = ManagedThread.ManagedThread(target = self.threadWorker) workerThread.start() self.threads_.append(workerThread)
def constructVDM(callbackScheduler, vectorRamCacheBytes=None, maxRamCacheBytes=None, maxVectorChunkSize=None): if vectorRamCacheBytes is None: vectorRamCacheBytes = Setup.config( ).cumulusVectorRamCacheMB * 1024 * 1024 if maxRamCacheBytes is None: maxRamCacheBytes = Setup.config().cumulusMaxRamCacheMB * 1024 * 1024 if maxVectorChunkSize is None: maxVectorChunkSize = Setup.config().maxPageSizeInBytes if maxVectorChunkSize > vectorRamCacheBytes / 32: logging.info( "VDM constructor specified a chunk size of %s MB " + "and a memory size of %s MB. Reducing the chunk size because its too large", vectorRamCacheBytes / 1024.0 / 1024.0, maxVectorChunkSize / 1024.0 / 1024.0) maxVectorChunkSize = vectorRamCacheBytes / 32 logging.info("Creating a VDM with %s MB of memory and %s max vector size", vectorRamCacheBytes / 1024.0 / 1024.0, maxVectorChunkSize / 1024.0 / 1024.0) VDM = FORANative.VectorDataManager(callbackScheduler, maxVectorChunkSize) VDM.setMemoryLimit(vectorRamCacheBytes, maxRamCacheBytes) return VDM
def __init__(self): callbackSchedulerFactory = CallbackScheduler.createSimpleCallbackSchedulerFactory( ) self.callbackScheduler = callbackSchedulerFactory.createScheduler( "Simulator", 1) self.uforaPath = os.path.abspath( os.path.join(os.path.dirname(__file__), '../')) self.sharedStatePath = os.path.join(self.uforaPath, 'distributed/SharedState') self.sharedStateMainline = os.path.join(self.sharedStatePath, 'sharedStateMainline.py') self.gatewayServiceMainline = os.path.join( self.uforaPath, 'scripts/init/ufora-gateway.py') self.webPath = os.path.join(self.uforaPath, 'web/relay') self.relayScript = os.path.join(self.webPath, 'server.coffee') self.relayPort = Setup.config().relayPort self.relayHttpsPort = Setup.config().relayHttpsPort self.sharedStatePort = Setup.config().sharedStatePort self.restApiPort = Setup.config().restApiPort self.subscribableWebObjectsPort = Setup.config( ).subscribableWebObjectsPort self.desirePublisher = None self._connectionManager = None
def __init__(self): callbackSchedulerFactory = CallbackScheduler.createSimpleCallbackSchedulerFactory() self.callbackScheduler = callbackSchedulerFactory.createScheduler("Simulator", 1) self.uforaPath = os.path.abspath(os.path.join(os.path.dirname(__file__), '../')) self.sharedStatePath = os.path.join(self.uforaPath, 'distributed/SharedState') self.sharedStateMainline = os.path.join(self.sharedStatePath, 'sharedStateMainline.py') self.gatewayServiceMainline = os.path.join(self.uforaPath, 'scripts/init/ufora-gateway.py') self.webPath = os.path.join(self.uforaPath, 'web/relay') self.relayScript = os.path.join(self.webPath, 'server.coffee') self.relayPort = Setup.config().relayPort self.relayHttpsPort = Setup.config().relayHttpsPort self.sharedStatePort = Setup.config().sharedStatePort self.restApiPort = Setup.config().restApiPort self.subscribableWebObjectsPort = Setup.config().subscribableWebObjectsPort #create an OutOfProcessDownloader so we can execute commands like 'forever' #from there, instead of forking from the main process (which can run out of memory) self.processPool = OutOfProcessDownloader.OutOfProcessDownloaderPool(1) self.desirePublisher = None self._connectionManager = None
def createServiceAndServiceThread(self): config = Setup.config() config.cumulusMaxRamCacheMB = self.cumulusMaxRamCacheSizeOverride config.cumulusVectorRamCacheMB = self.cumulusVectorRamCacheSizeOverride config.cumulusServiceThreadCount = self.cumulusThreadCountOverride config.cumulusDiskCacheStorageSubdirectory = str(uuid.uuid4()) ownAddress = str(uuid.uuid4()) callbackScheduler = self.callbackSchedulerFactory.createScheduler( "InMemoryClusterChild", 1) channelListener = self.createMultiChannelListener( callbackScheduler, [Setup.config().cumulusControlPort, Setup.config().cumulusDataPort], ownAddress) service = CumulusService.CumulusService( ownAddress=ownAddress, channelListener=channelListener, channelFactory=self.channelManager.createChannelFactory(), eventHandler=CumulusNative.CumulusWorkerHoldEventsInMemoryEventHandler(), callbackScheduler=callbackScheduler, diagnosticsDir=None, config=config, viewFactory=self.sharedStateViewFactory ) service.startService(lambda: None) return service
def startService(self): self.stopRelay() self.stopGatewayService() self.stopSharedState() KillProcessHoldingPort.killProcessGroupHoldingPorts( Setup.config().basePort, Setup.config().basePort + Setup.config().numPorts) self.createSimulationDirectory() self.startSharedState() try: self.startGatewayService() logging.info('Starting relay') with DirectoryScope.DirectoryScope(self.webPath): self.startRelayProcess(self.relayScript) logging.info("verifying that shared state is running") self.verifySharedStateRunning() self.desirePublisher = WorkerProcesses( os.path.join(self.uforaPath, 'scripts/init/ufora-worker.py')) except: logging.error( "Couldn't start ClusterSimulation service. Exception=\n%s", traceback.format_exc()) self.dumpRelayLogs() raise
def generateTestConfigFileBody_(self): return ("ROOT_DATA_DIR = %s\n" "BASE_PORT = %s\n" "FORA_MAX_MEM_MB = %s\n" ) % ( Setup.config().rootDataDir, Setup.config().basePort, "10000" if multiprocessing.cpu_count() <= 8 else "60000" )
def runPythonUnitTests_(args, testFilter, testGroupName, testFiles): testArgs = ["dummy"] if args.testHarnessVerbose or args.list: testArgs.append('--nocaptureall') testArgs.append('--verbosity=0') if not args.list: print "Executing %s unit tests." % testGroupName Setup.config().configureLoggingForUserProgram() parser = PythonTestArgumentParser() filterActions = parser.parse_args(args.remainder) bsaRootDir = os.path.split(ufora.__file__)[0] testCasesToRun = [] plugins = nose.plugins.manager.PluginManager([OutputCaptureNosePlugin()]) config = nose.config.Config(plugins=plugins) config.configure(testArgs) for i in range(args.copies): testCases = UnitTestCommon.loadTestCases(config, testFiles, bsaRootDir, 'ufora') if filterActions: testCases = applyFilterActions(filterActions, testCases) testCasesToRun += testCases if testFilter is not None: testCasesToRun = testFilter(testCasesToRun) if args.list: for test in testCasesToRun: print test.id() os._exit(0) if args.random: import random random.shuffle(testCasesToRun) if args.pythreadcheck: results = {} for test in testCasesToRun: results[test] = runPyTestSuite(config, None, unittest.TestSuite([test]), testArgs) return True in results.values() else: testFiles = '.' return runPyTestSuite(config, None, testCasesToRun, testArgs)
def executeTestAsMain(): with Setup.PushSetup(Setup.defaultSetup()): #tests = ['testKeys', 'testMutableDefaults', 'testProperty', 'testChangeMutable', 'testLocationAccess', 'testFunction', 'testInitializer', 'testCached', 'testNotCached', 'testRootProperty'] #tests = ['testProperty'] tests = ['testTemp'] #tests = ['testOrphan'] #tests = ['testLocKey2'] #tests = ['testKeys'] #tests = ['testCreate'] suite = unittest.TestSuite(map(ComputedGraphTest, tests)) unittest.TextTestRunner().run(suite)
def createService(args): callbackSchedulerFactory = CallbackScheduler.createSimpleCallbackSchedulerFactory() callbackScheduler = callbackSchedulerFactory.createScheduler('ufora-worker', 1) channelListener = MultiChannelListener(callbackScheduler, [args.base_port, args.base_port + 1]) sharedStateViewFactory = ViewFactory.ViewFactory.TcpViewFactory( callbackSchedulerFactory.createScheduler('SharedState', 1), args.manager_address, int(args.manager_port) ) channelFactory = TcpChannelFactory.TcpStringChannelFactory(callbackScheduler) diagnostics_dir = os.getenv("UFORA_WORKER_DIAGNOSTICS_DIR") eventHandler = diagnostics_dir and createEventHandler( diagnostics_dir, callbackSchedulerFactory.createScheduler("ufora-worker-event-handler", 1) ) own_address = args.own_address or get_own_ip() print "Listening on", own_address, "ports:", args.base_port, "and", args.base_port+1 config = Setup.config() print "RAM cache of %d / %d MB and %d threads. Track tcmalloc: %s" % ( config.cumulusVectorRamCacheMB, config.cumulusMaxRamCacheMB, config.cumulusServiceThreadCount, config.cumulusTrackTcmalloc ) print "Ufora store at %s:%s" % (args.manager_address, args.manager_port) s3InterfaceFactory = ActualS3Interface.ActualS3InterfaceFactory() print "PythonIoTasks threads: %d. Out of process: %s" % ( config.externalDatasetLoaderServiceThreads, s3InterfaceFactory.isCompatibleWithOutOfProcessDownloadPool ) return CumulusService.CumulusService( own_address, channelListener, channelFactory, eventHandler, callbackScheduler, diagnostics_dir, Setup.config(), viewFactory=sharedStateViewFactory, s3InterfaceFactory=s3InterfaceFactory, objectStore=NullObjectStore.NullObjectStore() )
def createService(args): callbackSchedulerFactory = CallbackScheduler.createSimpleCallbackSchedulerFactory() callbackScheduler = callbackSchedulerFactory.createScheduler('ufora-worker', 1) channelListener = MultiChannelListener(callbackScheduler, [args.base_port, args.base_port + 1]) sharedStateViewFactory = ViewFactory.ViewFactory.TcpViewFactory( callbackSchedulerFactory.createScheduler('SharedState', 1), args.manager_address, int(args.manager_port) ) channelFactory = TcpChannelFactory.TcpStringChannelFactory(callbackScheduler) diagnostics_dir = os.getenv("UFORA_WORKER_DIAGNOSTICS_DIR") eventHandler = diagnostics_dir and createEventHandler( diagnostics_dir, callbackSchedulerFactory.createScheduler("ufora-worker-event-handler", 1) ) own_address = args.own_address or get_own_ip() print "Listening on", own_address, "ports:", args.base_port, "and", args.base_port+1 return CumulusService.CumulusService( own_address, channelListener, channelFactory, eventHandler, callbackScheduler, diagnostics_dir, Setup.config(), viewFactory=sharedStateViewFactory )
def __init__(self, relayHostname, relayHttpsPort=None, messageDelayInSeconds=None): """Initialize a PipeTransport. messageDelayInSeconds - if not None, then all messages will be delayed by this many seconds before being pumped into the receiving channel. This can simulate delays talking over the internet. """ self.onMessageReceived = None self.onDisconnected = None self.inputLoopThread = None self.isShuttingDown = False self.proxyProcess = None self.isConnected = False self.messageDelayInSeconds = messageDelayInSeconds self.messagePumpThread = None self.messagePumpQueue = Queue.Queue() self.relayHostname = relayHostname if relayHttpsPort: self.relayHttpsPort = relayHttpsPort else: self.relayHttpsPort = Setup.config().relayHttpsPort self.proxyStdIn = None self.proxyStdOut = None self.proxyStdErr = None self.proxyOutputThread = None logging.info("PipeTransport created for host %s:%s", self.relayHostname, self.relayHttpsPort)
def test_resumingAfterCopyDataOutOfPages(self): vdm = FORANative.VectorDataManager(callbackScheduler, Setup.config().maxPageSizeInBytes) context = ExecutionContext.ExecutionContext(dataManager=vdm) context.interruptAfterCycleCount(100000) text = """ fun() { let v = Vector.range(1000).paged; let ix1 = 0 let res = 0 while (true) { res = res + v[ix1] ix1 = (ix1 + 1) % size(v) } res }""" context.evaluate(FORA.extractImplValContainer(FORA.eval(text)), FORANative.symbol_Call) paused1 = context.extractPausedComputation() while not context.isVectorLoad(): context.copyValuesOutOfVectorPages() vdm.unloadAllPossible() context.interruptAfterCycleCount(100000) context.resume() paused2 = context.extractPausedComputation() self.assertTrue(len(paused1.frames) == len(paused2.frames))
def initialize(setupObjectToUse = None, reimport = False): global _builtinModuleMembers, _builtinModuleImplVal, _builtinPath if _builtinModuleMembers is not None and not reimport: return if setupObjectToUse is None: setupObjectToUse = Setup.currentSetup() try: _builtinPath = os.path.abspath(os.path.join(_curDir, "..","builtin")) def pathToCodeDefinitionStrings(path): return ["Builtins", os.path.relpath(path, os.path.join(_builtinPath, ".."))] _builtinModuleImplVal = importModuleFromPath( _builtinPath, allowPrivate = True, pathToCodeDefinitionStrings = pathToCodeDefinitionStrings ) except Exception as e: import traceback traceback.print_exc() raise e _builtinModuleMembers = _builtinModuleImplVal.objectMembers logging.info("Initialized ModuleImporter with builtin hash of '%s'", hash(_builtinModuleImplVal))
def __init__(self, offlineCacheFunction, newMemLimit, remoteEvaluator=None, newLoadRatio=.5, maxPageSizeInBytes=None, vdmOverride=None): if maxPageSizeInBytes is None: maxPageSizeInBytes = Setup.config().maxPageSizeInBytes if vdmOverride is not None: self.vdm_ = vdmOverride self.offlineCache_ = None else: self.vdm_ = FORANative.VectorDataManager( CallbackSchedulerNative.createSimpleCallbackSchedulerFactory(). createScheduler("LocalEvaluator", 1), maxPageSizeInBytes) self.vdm_.setDropUnreferencedPagesWhenFull(True) self.offlineCache_ = offlineCacheFunction(self.vdm_) if self.offlineCache_ is not None: self.vdm_.setOfflineCache(self.offlineCache_) logging.info("LocalEvaluator Creating VDMC with %s MB", newMemLimit / 1024.0 / 1024.0) self.vdm_.setMemoryLimit(newMemLimit, int(newMemLimit * 1.25)) self.vdm_.setLoadRatio(newLoadRatio) self.remoteEvaluator_ = remoteEvaluator self.cache_ = ComputationCache(self.vdm_, self.offlineCache_)
def KeyspaceManager(randomSeed, numManagers, backupInterval=60*10, pingInterval=20, cachePathOverride=None, maxOpenFiles=None, maxLogFileSizeMb=10): if cachePathOverride is None: cachePathOverride = Setup.config().sharedStateCache if maxOpenFiles is None: import resource maxOpenFiles = min(resource.getrlimit(resource.RLIMIT_NOFILE)[0] / 2, 1000) if cachePathOverride != "": logging.info( "Creating FileStorage(cachePathOverride=%s, maxOpenFiles=%s, maxLogFileSizeMb=%s)", cachePathOverride, maxOpenFiles, maxLogFileSizeMb) storage = SharedStateNative.Storage.FileStorage(cachePathOverride, maxOpenFiles, maxLogFileSizeMb) else: storage = None return SharedStateNative.KeyspaceManager( randomSeed, numManagers, backupInterval, pingInterval, storage )
def test_refcountsInCompiledCode(self): vdm = FORANative.VectorDataManager(callbackScheduler, Setup.config().maxPageSizeInBytes) context = ExecutionContext.ExecutionContext( dataManager=vdm, allowInterpreterTracing=True, blockUntilTracesAreCompiled=True) text = """fun(){ let f = fun(v, depth) { if (depth > 100) //this will trigger an interrupt since the data cannot exist in the VDM datasets.s3('','') else f(v, depth+1) } f([1,2,3,4,5], 0) }""" context.evaluate(FORA.extractImplValContainer(FORA.eval(text)), FORANative.symbol_Call) stacktraceText = context.extractCurrentTextStacktrace() self.assertTrue(stacktraceText.count("Vector") < 10)
def setUpClass(cls): cls.config = Setup.config() cls.executor = None cls.simulation = ClusterSimulation.Simulator.createGlobalSimulator() cls.simulation.startService() cls.simulation.getDesirePublisher().desireNumberOfWorkers(1) cls.ufora = pyfora.connect('http://localhost:30000')
def test_extractPausedComputationDuringVectorLoad(self): self.runtime = Runtime.getMainRuntime() #self.dynamicOptimizer = self.runtime.dynamicOptimizer vdm = FORANative.VectorDataManager(callbackScheduler, Setup.config().maxPageSizeInBytes) context = ExecutionContext.ExecutionContext( dataManager = vdm, allowInterpreterTracing = False ) context.evaluate( FORA.extractImplValContainer(FORA.eval("fun() { [1,2,3].paged }")), FORANative.ImplValContainer(FORANative.makeSymbol("Call")) ) pagedVec = context.getFinishedResult().asResult.result context.placeInEvaluationState( FORANative.ImplValContainer( (pagedVec, FORANative.ImplValContainer(FORANative.makeSymbol("GetItem")), FORANative.ImplValContainer(0)) ) ) vdm.unloadAllPossible() context.resume() self.assertTrue(context.isVectorLoad()) computation = context.extractPausedComputation() self.assertEqual(len(computation.frames),1)
def test_serialize_while_holding_interior_vector(self): vdm = FORANative.VectorDataManager(callbackScheduler, Setup.config().maxPageSizeInBytes) context = ExecutionContext.ExecutionContext(dataManager = vdm, allowInterpreterTracing=False) context.evaluate( FORA.extractImplValContainer( FORA.eval(""" fun() { let v = [[1].paged].paged; let v2 = v[0] `TriggerInterruptForTesting() 1+2+3+v+v2 }""" ) ), FORANative.symbol_Call ) self.assertTrue(context.isInterrupted()) serialized = context.serialize() context = None
def test_verifyThatExtractingPausedComputationsDoesntDuplicateLargeStrings(self): text = """fun() { let s = ' ' while (size(s) < 1000000) s = s + s let f = fun(x) { if (x > 0) return f(x-1) + s[x]; `TriggerInterruptForTesting() } f(20) }""" vdm = FORANative.VectorDataManager(callbackScheduler, Setup.config().maxPageSizeInBytes) context = ExecutionContext.ExecutionContext( dataManager = vdm, allowInterpreterTracing = False ) context.evaluate( FORA.extractImplValContainer(FORA.eval(text)), FORANative.symbol_Call ) computation = context.extractPausedComputation() context2 = ExecutionContext.ExecutionContext( dataManager = vdm, allowInterpreterTracing = False ) context2.resumePausedComputation(computation) self.assertTrue( context2.totalBytesUsed < 2 * context.totalBytesUsed )
def test_refcountsInCompiledCode(self): vdm = FORANative.VectorDataManager(callbackScheduler, Setup.config().maxPageSizeInBytes) context = ExecutionContext.ExecutionContext( dataManager = vdm, allowInterpreterTracing = True, blockUntilTracesAreCompiled = True ) text = """fun(){ let f = fun(v, depth) { if (depth > 100) //this will trigger an interrupt since the data cannot exist in the VDM datasets.s3('','') else f(v, depth+1) } f([1,2,3,4,5], 0) }""" context.evaluate( FORA.extractImplValContainer(FORA.eval(text)), FORANative.symbol_Call ) stacktraceText = context.extractCurrentTextStacktrace() self.assertTrue(stacktraceText.count("Vector") < 10)
def createViewFactory(): callbackSchedulerFactory = CallbackScheduler.createSimpleCallbackSchedulerFactory( ) return ViewFactory.ViewFactory.TcpViewFactory( callbackSchedulerFactory.createScheduler('fora-interpreter', 1), 'localhost', Setup.config().sharedStatePort)
def _addWorker(self): worker_id = uuid.uuid4() thread = threading.Thread(target=self._runWorker, args=(worker_id, Setup.config().fakeAwsBaseDir)) self.threads[worker_id] = thread thread.start()
def test_teardown_simple(self): vdm = FORANative.VectorDataManager(callbackScheduler, Setup.config().maxPageSizeInBytes) context = ExecutionContext.ExecutionContext(dataManager = vdm) context.evaluate( FORA.extractImplValContainer( FORA.eval("fun(){nothing}") ), FORANative.symbol_Call ) context.getFinishedResult() toEval = FORA.extractImplValContainer( FORA.eval( """fun() { let f = fun() { }; let v = [1, [3]]; cached(f()) }""" ) ) context.evaluate(toEval, FORANative.symbol_Call) while not context.isCacheRequest(): context.resume() context.teardown(True)
def createViewFactory(): callbackSchedulerFactory = CallbackScheduler.createSimpleCallbackSchedulerFactory() return ViewFactory.ViewFactory.TcpViewFactory( callbackSchedulerFactory.createScheduler('fora-interpreter', 1), 'localhost', Setup.config().sharedStatePort )
def parseS3Dataset(s3InterfaceFactory, s3Dataset): """Log in to amazon S3 and return an appropriate s3Interface and a bucket/keypair""" if s3Dataset.isInternal(): #use the internal login. This should have access only to our one internal bucket return ( s3InterfaceFactory(), Setup.config().userDataS3Bucket, s3Dataset.asInternal.keyname ) elif s3Dataset.isExternal(): asE = s3Dataset.asExternal if asE.awsAccessKey != "": try: interface = s3InterfaceFactory( asE.awsAccessKey, asE.awsSecretKey ) except: raise InvalidDatasetException("Failed to log into S3 with given credentials") else: interface = s3InterfaceFactory() return (interface, asE.bucket, asE.key) else: raise DatasetLoadException("Unknown dataset type")
def defaultLocalEvaluator(remoteEvaluator=None, vdmOverride=None): return LocalEvaluator( lambda vdm: None, Setup.config().cumulusVectorRamCacheMB * 1024 * 1024, remoteEvaluator, vdmOverride=vdmOverride )
def test_teardown(self): harness = SharedStateTestHarness.SharedStateTestHarness(False, port=Setup.config().sharedStatePort) view = harness.newView() view.teardown() harness.teardown()
def initialize(setupObjectToUse=None, reimport=False): global _builtinModuleMembers, _builtinModuleImplVal, _builtinPath if _builtinModuleMembers is not None and not reimport: return if setupObjectToUse is None: setupObjectToUse = Setup.currentSetup() try: _builtinPath = os.path.abspath(os.path.join(_curDir, "..", "builtin")) def pathToCodeDefinitionStrings(path): return [ "Builtins", os.path.relpath(path, os.path.join(_builtinPath, "..")) ] _builtinModuleImplVal = importModuleFromPath( _builtinPath, allowPrivate=True, pathToCodeDefinitionStrings=pathToCodeDefinitionStrings) except Exception as e: import traceback traceback.print_exc() raise e _builtinModuleMembers = _builtinModuleImplVal.objectMembers logging.info("Builtin module hash: %s", hash(_builtinModuleImplVal))
def test_verifyThatExtractingPausedComputationsDoesntDuplicateLargeStrings( self): text = """fun() { let s = ' ' while (size(s) < 1000000) s = s + s let f = fun(x) { if (x > 0) return f(x-1) + s[x]; `TriggerInterruptForTesting() } f(20) }""" vdm = FORANative.VectorDataManager(callbackScheduler, Setup.config().maxPageSizeInBytes) context = ExecutionContext.ExecutionContext( dataManager=vdm, allowInterpreterTracing=False) context.evaluate(FORA.extractImplValContainer(FORA.eval(text)), FORANative.symbol_Call) computation = context.extractPausedComputation() context2 = ExecutionContext.ExecutionContext( dataManager=vdm, allowInterpreterTracing=False) context2.resumePausedComputation(computation) self.assertTrue(context2.totalBytesUsed < 2 * context.totalBytesUsed)
def main(args): print "ufora-worker starting" setup = defaultSetup() with Setup.PushSetup(setup): setup.config.configureLoggingForBackgroundProgram() worker = createService(args) worker.startService(None) def signal_handler(sig, _): signal_name = '(unknown)' if sig == signal.SIGINT: signal_name = 'SIGINT' elif sig == signal.SIGTERM: signal_name = 'SIGTERM' print 'Received ', signal_name, 'signal. Exiting.' worker.stopService() sys.exit(0) signal.signal(signal.SIGINT, signal_handler) signal.signal(signal.SIGTERM, signal_handler) print "Press Ctrl+C to exit." signal.pause()
def test_extractPausedComputationDuringVectorLoad(self): self.runtime = Runtime.getMainRuntime() #self.dynamicOptimizer = self.runtime.dynamicOptimizer vdm = FORANative.VectorDataManager(callbackScheduler, Setup.config().maxPageSizeInBytes) context = ExecutionContext.ExecutionContext( dataManager=vdm, allowInterpreterTracing=False) context.evaluate( FORA.extractImplValContainer(FORA.eval("fun() { [1,2,3].paged }")), FORANative.ImplValContainer(FORANative.makeSymbol("Call"))) pagedVec = context.getFinishedResult().asResult.result context.placeInEvaluationState( FORANative.ImplValContainer( (pagedVec, FORANative.ImplValContainer(FORANative.makeSymbol("GetItem")), FORANative.ImplValContainer(0)))) vdm.unloadAllPossible() context.resume() self.assertTrue(context.isVectorLoad()) computation = context.extractPausedComputation() self.assertEqual(len(computation.frames), 1)
def test_teardown_during_vector_load(self): vdm = FORANative.VectorDataManager(callbackScheduler, Setup.config().maxPageSizeInBytes) context = ExecutionContext.ExecutionContext(dataManager = vdm) context.evaluate( FORA.extractImplValContainer( FORA.eval("fun() { let v = [1,2,3].paged; fun() { v[1] } }") ), FORANative.symbol_Call ) vdm.unloadAllPossible() pagedVecAccessFun = context.getFinishedResult().asResult.result context.teardown() context.evaluate( pagedVecAccessFun, FORANative.symbol_Call ) self.assertFalse(context.isInterrupted()) self.assertTrue(context.isVectorLoad()) context.teardown()
def test_teardown_simple(self): vdm = FORANative.VectorDataManager(callbackScheduler, Setup.config().maxPageSizeInBytes) context = ExecutionContext.ExecutionContext( dataManager=vdm, allowInternalSplitting=False) evaluate(context, FORA.extractImplValContainer(FORA.eval("fun(){nothing}")), FORANative.symbol_Call) context.getFinishedResult() toEval = FORA.extractImplValContainer( FORA.eval("""fun() { let f = fun() { }; let v = [1, [3]]; cached(f()) }""")) evaluate(context, toEval, FORANative.symbol_Call) while not context.isCacheRequest(): context.compute() context.teardown(True)
def test_teardown(self): harness = SharedStateTestHarness.SharedStateTestHarness( False, port=Setup.config().sharedStatePort) view = harness.newView() view.teardown() harness.teardown()
def __init__(self, relayHostname, relayHttpsPort = None, messageDelayInSeconds = None): """Initialize a PipeTransport. messageDelayInSeconds - if not None, then all messages will be delayed by this many seconds before being pumped into the receiving channel. This can simulate delays talking over the internet. """ self.onMessageReceived = None self.onDisconnected = None self.inputLoopThread = None self.isShuttingDown = False self.proxyProcess = None self.isConnected = False self.messageDelayInSeconds = messageDelayInSeconds self.messagePumpThread = None self.messagePumpQueue = Queue.Queue() self.relayHostname = relayHostname if relayHttpsPort: self.relayHttpsPort = relayHttpsPort else: self.relayHttpsPort = Setup.config().relayHttpsPort self.proxyStdIn = None self.proxyStdOut = None self.proxyStdErr = None self.proxyOutputThread = None logging.info("PipeTransport created for host %s:%s", self.relayHostname, self.relayHttpsPort)
def __call__(self): start, stop = self.lowOffset, self.highOffset logging.info("Starting extraction of %s, %s, [%s, %s]", self.bucketname, self.keyname, start, stop) t0 = time.time() totalThreads = Setup.config().externalDatasetLoaderThreadcount def downloadThread(ix): def downloader(): low = start + (stop - start) * ix / totalThreads high = start + (stop - start) * (ix + 1) / totalThreads tries = 0 while True: try: results[ix] = self.s3Interface.getKeyValueOverRange( self.bucketname, self.keyname, low, high) return except: if tries < 10: logging.warn( "Task %s had an exception:%s\nTries = %s. We will fail the " + "request when 'tries' gets to 10", self, traceback.format_exc(), tries) tries += 1 else: results[ix] = sys.exc_info() return return downloader results = [] threads = [] for ix in range(totalThreads): results.append(None) threads.append(threading.Thread(target=downloadThread(ix))) for t in threads: t.start() for t in threads: t.join() for r in results: if isinstance(r, tuple): exc_info = r raise exc_info[0], exc_info[1], exc_info[2] result = "".join(results) assert len(result) == stop - start logging.info("Actually extracting %s from s3 took %s", len(result) / 1024 / 1024.0, time.time() - t0) return result
def KeyspaceManager(randomSeed, numManagers, backupInterval=60 * 10, pingInterval=20, cachePathOverride=None, maxOpenFiles=None, maxLogFileSizeMb=10): if cachePathOverride is None: cachePathOverride = Setup.config().sharedStateCache if maxOpenFiles is None: import resource maxOpenFiles = min( resource.getrlimit(resource.RLIMIT_NOFILE)[0] / 2, 1000) if cachePathOverride != "": logging.info( "Creating FileStorage(cachePathOverride=%s, maxOpenFiles=%s, maxLogFileSizeMb=%s)", cachePathOverride, maxOpenFiles, maxLogFileSizeMb) storage = SharedStateNative.Storage.FileStorage( cachePathOverride, maxOpenFiles, maxLogFileSizeMb) else: storage = None return SharedStateNative.KeyspaceManager(randomSeed, numManagers, backupInterval, pingInterval, storage)
def testDualConnectThrows(self): server1 = SimpleServer.SimpleServer(port = Setup.config().testPort) thread1 = threading.Thread(target=suppressExceptionLogging(server1.runListenLoop)) thread1.start() server1.blockUntilListening() server2 = SimpleServer.SimpleServer(port = Setup.config().testPort) thread2 = threading.Thread(target=suppressExceptionLogging(server2.runListenLoop)) thread2.start() self.assertRaises(Exception, server2.blockUntilListening) server1.stop() server2.stop() thread1.join() thread2.join()
def setUpClass(cls): cls.config = Setup.config() cls.executor = None cls.simulation = ClusterSimulation.Simulator.createGlobalSimulator() cls.simulation.startService() cls.simulation.getDesirePublisher().desireNumberOfWorkers(1) cls.cur_dir = os.path.dirname(os.path.realpath('__file__'))
def setUpClass(cls): cls.config = Setup.config() cls.executor = None cls.simulation = ClusterSimulation.Simulator.createGlobalSimulator() cls.simulation.startService() cls.simulation.getDesirePublisher().desireNumberOfWorkers(1) cls.waitUntilConnected()
def setUpClass(cls): import logging logging.getLogger('requests').setLevel(logging.WARNING) logging.basicConfig(level=logging.DEBUG) cls.config = Setup.config() cls.simulation = ClusterSimulation.Simulator.createGlobalSimulator() cls.simulation.startService()
def setUp(): # clear Compiler Cache and run some test ccdir = Setup.config().compilerDiskCacheDir filesBefore = os.listdir(ccdir) for file in filesBefore: filePath = os.path.join(ccdir, file) os.remove(filePath) runSomeFora()