def start(args): if actor.actor_system_already_running(): raise exceptions.RallyError( "An actor system appears to be already running.") actor.bootstrap_actor_system(local_ip=args.node_ip, coordinator_ip=args.coordinator_ip) console.info( "Successfully started actor system on node [%s] with coordinator node IP [%s]." % (args.node_ip, args.coordinator_ip))
def with_actor_system(runnable, cfg): already_running = actor.actor_system_already_running() logger.info("Actor system already running locally? [%s]" % str(already_running)) try: actors = actor.bootstrap_actor_system(try_join=already_running, prefer_local_only=not already_running) # We can only support remote benchmarks if we have a dedicated daemon that is not only bound to 127.0.0.1 cfg.add(config.Scope.application, "system", "remote.benchmarking.supported", already_running) except RuntimeError as e: logger.exception("Could not bootstrap actor system.") if str(e) == "Unable to determine valid external socket address.": console.warn("Could not determine a socket address. Are you running without any network? Switching to degraded mode.", logger=logger) actor.use_offline_actor_system() actors = actor.bootstrap_actor_system(try_join=True) else: raise try: runnable(cfg) finally: # We only shutdown the actor system if it was not already running before if not already_running: shutdown_complete = False times_interrupted = 0 while not shutdown_complete and times_interrupted < 2: try: logger.info("Attempting to shutdown internal actor system.") actors.shutdown() # note that this check will only evaluate to True for a TCP-based actor system. timeout = 15 while actor.actor_system_already_running() and timeout > 0: logger.info("Actor system is still running. Waiting...") time.sleep(1) timeout -= 1 if timeout > 0: shutdown_complete = True logger.info("Shutdown completed.") else: logger.warning("Shutdown timed out. Actor system is still running.") break except KeyboardInterrupt: times_interrupted += 1 logger.warning("User interrupted shutdown of internal actor system.") console.info("Please wait a moment for Rally's internal components to shutdown.") if not shutdown_complete and times_interrupted > 0: logger.warning("Terminating after user has interrupted actor system shutdown explicitly for [%d] times." % times_interrupted) console.println("") console.warn("Terminating now at the risk of leaving child processes behind.") console.println("") console.warn("The next race may fail due to an unclean shutdown.") console.println("") console.println(SKULL) console.println("") elif not shutdown_complete: console.warn("Could not terminate all internal processes within timeout. Please check and force-terminate all Rally processes.")
def start(args): if actor.actor_system_already_running(): raise exceptions.RallyError("An actor system appears to be already running.") # TheSpian writes the following warning upon start (at least) on Mac OS X: # # WARNING:root:Unable to get address info for address 103.1.168.192.in-addr.arpa (AddressFamily.AF_INET,\ # SocketKind.SOCK_DGRAM, 17, 0): <class 'socket.gaierror'> [Errno 8] nodename nor servname provided, or not known # # Therefore, we will not show warnings but only errors. logging.basicConfig(level=logging.ERROR) actor.bootstrap_actor_system(local_ip=args.node_ip, coordinator_ip=args.coordinator_ip) console.info("Successfully started actor system on node [%s] with coordinator node IP [%s]." % (args.node_ip, args.coordinator_ip))
def stop(raise_errors=True): if actor.actor_system_already_running(): # noinspection PyBroadException try: # TheSpian writes the following warning upon start (at least) on Mac OS X: # # WARNING:root:Unable to get address info for address 103.1.168.192.in-addr.arpa (AddressFamily.AF_INET,\ # SocketKind.SOCK_DGRAM, 17, 0): <class 'socket.gaierror'> [Errno 8] nodename nor servname provided, or not known # # Therefore, we will not show warnings but only errors. logging.basicConfig(level=logging.ERROR) running_system = actor.bootstrap_actor_system(try_join=True) running_system.shutdown() # await termination... console.info("Shutting down actor system.", end="", flush=True) while actor.actor_system_already_running(): console.println(".", end="", flush=True) time.sleep(1) console.println(" [OK]") except BaseException: console.error("Could not shut down actor system.") if raise_errors: # raise again so user can see the error raise elif raise_errors: console.error( "Could not shut down actor system: Actor system is not running.") sys.exit(1)
def setup(self): # at this point an actor system has to run and we should only join self.actor_system = actor.bootstrap_actor_system(try_join=True) self.mechanic = self.actor_system.createActor( mechanic.MechanicActor, targetActorRequirements={"coordinator": True}, globalName="/rally/mechanic/coordinator") logger.info("Asking mechanic to start the engine.") result = self.actor_system.ask( self.mechanic, mechanic.StartEngine(self.cfg, self.metrics_store.open_context, self.sources, self.build, self.distribution, self.external, self.docker)) if isinstance(result, mechanic.EngineStarted): logger.info("Mechanic has started engine successfully.") self.metrics_store.meta_info = result.system_meta_info cluster = result.cluster_meta_info self.race_store.store_race(self.track, cluster.hosts, cluster.revision, cluster.distribution_version) console.info("Racing on track [%s], challenge [%s] and car [%s]" % (self.track, self.track.find_challenge_or_default( self.cfg.opts("track", "challenge.name")), self.cfg.opts("mechanic", "car.name"))) # just ensure it is optically separated console.println("") elif isinstance(result, mechanic.Failure): logger.info("Starting engine has failed. Reason [%s]." % result.message) raise exceptions.RallyError(result.message) else: raise exceptions.RallyError( "Mechanic has not started engine but instead [%s]. Terminating race without result." % str(result))
def setup(self): # at this point an actor system has to run and we should only join self.actor_system = actor.bootstrap_actor_system(try_join=True) self.mechanic = self.actor_system.createActor(mechanic.MechanicActor, targetActorRequirements={"coordinator": True}, globalName="/rally/mechanic/coordinator") logger.info("Asking mechanic to start the engine.") # This can only work accurately if the user has already specified the correct version! cluster_settings = self.race.challenge.cluster_settings result = self.actor_system.ask(self.mechanic, mechanic.StartEngine( self.cfg, self.metrics_store.open_context, cluster_settings, self.sources, self.build, self.distribution, self.external, self.docker)) if isinstance(result, mechanic.EngineStarted): logger.info("Mechanic has started engine successfully.") self.metrics_store.meta_info = result.system_meta_info cluster = result.cluster_meta_info self.race.cluster = cluster if not self.cfg.exists("mechanic", "distribution.version"): self.cfg.add(config.Scope.benchmark, "mechanic", "distribution.version", cluster.distribution_version) logger.info("Reloading track based for distribution version [%s]" % cluster.distribution_version) t = self._load_track() self.race.track = t self.race.challenge = self._find_challenge(t) console.info("Racing on track [%s], challenge [%s] and car [%s]\n" % (self.race.track_name, self.race.challenge_name, self.race.car)) elif isinstance(result, mechanic.Failure): logger.info("Starting engine has failed. Reason [%s]." % result.message) raise exceptions.RallyError(result.message) else: raise exceptions.RallyError("Mechanic has not started engine but instead [%s]. Terminating race without result." % str(result))
def race(cfg, sources=False, distribution=False, external=False, docker=False): logger = logging.getLogger(__name__) # at this point an actor system has to run and we should only join actor_system = actor.bootstrap_actor_system(try_join=True) benchmark_actor = actor_system.createActor( BenchmarkActor, targetActorRequirements={"coordinator": True}) try: result = actor_system.ask( benchmark_actor, Setup(cfg, sources, distribution, external, docker)) if isinstance(result, Success): logger.info("Benchmark has finished successfully.") # may happen if one of the load generators has detected that the user has cancelled the benchmark. elif isinstance(result, actor.BenchmarkCancelled): logger.info( "User has cancelled the benchmark (detected by actor).") elif isinstance(result, actor.BenchmarkFailure): logger.error("A benchmark failure has occurred") raise exceptions.RallyError(result.message, result.cause) else: raise exceptions.RallyError( "Got an unexpected result during benchmarking: [%s]." % str(result)) except KeyboardInterrupt: logger.info( "User has cancelled the benchmark (detected by race control).") # notify the coordinator so it can properly handle this state. Do it blocking so we don't have a race between this message # and the actor exit request. actor_system.ask(benchmark_actor, actor.BenchmarkCancelled()) raise exceptions.UserInterrupted( "User has cancelled the benchmark (detected by race control)." ) from None finally: logger.info("Telling benchmark actor to exit.") actor_system.tell(benchmark_actor, thespian.actors.ActorExitRequest())
def stop(raise_errors=True): if actor.actor_system_already_running(): try: # TheSpian writes the following warning upon start (at least) on Mac OS X: # # WARNING:root:Unable to get address info for address 103.1.168.192.in-addr.arpa (AddressFamily.AF_INET,\ # SocketKind.SOCK_DGRAM, 17, 0): <class 'socket.gaierror'> [Errno 8] nodename nor servname provided, or not known # # Therefore, we will not show warnings but only errors. logging.basicConfig(level=logging.ERROR) running_system = actor.bootstrap_actor_system(try_join=True) running_system.shutdown() # await termination... console.info("Shutting down actor system.", end="", flush=True) while actor.actor_system_already_running(): console.println(".", end="", flush=True) time.sleep(1) console.println(" [OK]") except BaseException: console.error("Could not shut down actor system.") if raise_errors: # raise again so user can see the error raise elif raise_errors: console.error("Could not shut down actor system: Actor system is not running.") sys.exit(1)
def list_facts(cfg): console.info("This is an experimental command and subject to change.") # provide a custom error message target_hosts = cfg.opts("facts", "hosts", mandatory=False) if not target_hosts: raise exceptions.SystemSetupError( "Please define a target host with --target-hosts") if len(target_hosts) > 1: raise exceptions.SystemSetupError( "Only one target host is supported at the moment but you provided %s" % target_hosts) # at this point an actor system has to run and we should only join actor_system = actor.bootstrap_actor_system(try_join=True) facts_actor = actor_system.createActor( FactsActor, targetActorRequirements={"ip": target_hosts[0]}) result = actor_system.ask(facts_actor, GatherFacts()) if isinstance(result, Facts): console.println(json.dumps(result.facts, indent=" ")) else: raise exceptions.RallyError("Could not gather facts: [%s]." % str(result))
def race(cfg, sources=False, build=False, distribution=False, external=False, docker=False): # at this point an actor system has to run and we should only join actor_system = actor.bootstrap_actor_system(try_join=True) benchmark_actor = actor_system.createActor(BenchmarkActor, targetActorRequirements={"coordinator": True}) try: result = actor_system.ask(benchmark_actor, Setup(cfg, sources, build, distribution, external, docker)) if isinstance(result, Success): logger.info("Benchmark has finished successfully.") # may happen if one of the load generators has detected that the user has cancelled the benchmark. elif isinstance(result, actor.BenchmarkCancelled): logger.info("User has cancelled the benchmark (detected by actor).") elif isinstance(result, actor.BenchmarkFailure): logger.error("A benchmark failure has occurred") raise exceptions.RallyError(result.message, result.cause) else: raise exceptions.RallyError("Got an unexpected result during benchmarking: [%s]." % str(result)) except KeyboardInterrupt: logger.info("User has cancelled the benchmark (detected by race control).") # notify the coordinator so it can properly handle this state. Do it blocking so we don't have a race between this message # and the actor exit request. actor_system.ask(benchmark_actor, actor.BenchmarkCancelled()) finally: logger.info("Telling benchmark actor to exit.") actor_system.tell(benchmark_actor, thespian.actors.ActorExitRequest())
def with_actor_system(runnable, cfg): logger = logging.getLogger(__name__) already_running = actor.actor_system_already_running() logger.info("Actor system already running locally? [%s]", str(already_running)) try: actors = actor.bootstrap_actor_system( try_join=already_running, prefer_local_only=not already_running) # We can only support remote benchmarks if we have a dedicated daemon that is not only bound to 127.0.0.1 cfg.add(config.Scope.application, "system", "remote.benchmarking.supported", already_running) # This happens when the admin process could not be started, e.g. because it could not open a socket. except thespian.actors.InvalidActorAddress: logger.info("Falling back to offline actor system.") actor.use_offline_actor_system() actors = actor.bootstrap_actor_system(try_join=True) except KeyboardInterrupt: raise exceptions.UserInterrupted( "User has cancelled the benchmark (detected whilst bootstrapping actor system)." ) from None except Exception as e: logger.exception("Could not bootstrap actor system.") if str(e) == "Unable to determine valid external socket address.": console.warn( "Could not determine a socket address. Are you running without any network? Switching to degraded mode.", logger=logger) logger.info("Falling back to offline actor system.") actor.use_offline_actor_system() actors = actor.bootstrap_actor_system(try_join=True) else: raise try: runnable(cfg) finally: # We only shutdown the actor system if it was not already running before if not already_running: shutdown_complete = False times_interrupted = 0 while not shutdown_complete and times_interrupted < 2: try: # give some time for any outstanding messages to be delivered to the actor system time.sleep(3) logger.info( "Attempting to shutdown internal actor system.") actors.shutdown() # note that this check will only evaluate to True for a TCP-based actor system. timeout = 15 while actor.actor_system_already_running() and timeout > 0: logger.info( "Actor system is still running. Waiting...") time.sleep(1) timeout -= 1 if timeout > 0: shutdown_complete = True logger.info("Shutdown completed.") else: logger.warning( "Shutdown timed out. Actor system is still running." ) break except KeyboardInterrupt: times_interrupted += 1 logger.warning( "User interrupted shutdown of internal actor system.") console.info( "Please wait a moment for Rally's internal components to shutdown." ) if not shutdown_complete and times_interrupted > 0: logger.warning( "Terminating after user has interrupted actor system shutdown explicitly for [%d] times.", times_interrupted) console.println("") console.warn( "Terminating now at the risk of leaving child processes behind." ) console.println("") console.warn( "The next race may fail due to an unclean shutdown.") console.println("") console.println(SKULL) console.println("") raise exceptions.UserInterrupted( f"User has cancelled the benchmark (shutdown not complete as user interrupted " f"{times_interrupted} times).") from None elif not shutdown_complete: console.warn( "Could not terminate all internal processes within timeout. Please check and force-terminate all Rally processes." )