Beispiel #1
0
def start(args):
    if actor.actor_system_already_running():
        raise exceptions.RallyError(
            "An actor system appears to be already running.")
    actor.bootstrap_actor_system(local_ip=args.node_ip,
                                 coordinator_ip=args.coordinator_ip)
    console.info(
        "Successfully started actor system on node [%s] with coordinator node IP [%s]."
        % (args.node_ip, args.coordinator_ip))
Beispiel #2
0
def with_actor_system(runnable, cfg):
    already_running = actor.actor_system_already_running()
    logger.info("Actor system already running locally? [%s]" % str(already_running))
    try:
        actors = actor.bootstrap_actor_system(try_join=already_running, prefer_local_only=not already_running)
        # We can only support remote benchmarks if we have a dedicated daemon that is not only bound to 127.0.0.1
        cfg.add(config.Scope.application, "system", "remote.benchmarking.supported", already_running)
    except RuntimeError as e:
        logger.exception("Could not bootstrap actor system.")
        if str(e) == "Unable to determine valid external socket address.":
            console.warn("Could not determine a socket address. Are you running without any network? Switching to degraded mode.",
                         logger=logger)
            actor.use_offline_actor_system()
            actors = actor.bootstrap_actor_system(try_join=True)
        else:
            raise
    try:
        runnable(cfg)
    finally:
        # We only shutdown the actor system if it was not already running before
        if not already_running:
            shutdown_complete = False
            times_interrupted = 0
            while not shutdown_complete and times_interrupted < 2:
                try:
                    logger.info("Attempting to shutdown internal actor system.")
                    actors.shutdown()
                    # note that this check will only evaluate to True for a TCP-based actor system.
                    timeout = 15
                    while actor.actor_system_already_running() and timeout > 0:
                        logger.info("Actor system is still running. Waiting...")
                        time.sleep(1)
                        timeout -= 1
                    if timeout > 0:
                        shutdown_complete = True
                        logger.info("Shutdown completed.")
                    else:
                        logger.warning("Shutdown timed out. Actor system is still running.")
                        break
                except KeyboardInterrupt:
                    times_interrupted += 1
                    logger.warning("User interrupted shutdown of internal actor system.")
                    console.info("Please wait a moment for Rally's internal components to shutdown.")
            if not shutdown_complete and times_interrupted > 0:
                logger.warning("Terminating after user has interrupted actor system shutdown explicitly for [%d] times." % times_interrupted)
                console.println("")
                console.warn("Terminating now at the risk of leaving child processes behind.")
                console.println("")
                console.warn("The next race may fail due to an unclean shutdown.")
                console.println("")
                console.println(SKULL)
                console.println("")
            elif not shutdown_complete:
                console.warn("Could not terminate all internal processes within timeout. Please check and force-terminate all Rally processes.")
Beispiel #3
0
def with_actor_system(runnable, cfg):
    already_running = actor.actor_system_already_running()
    logger.info("Actor system already running locally? [%s]" % str(already_running))
    try:
        actors = actor.bootstrap_actor_system(try_join=already_running, prefer_local_only=not already_running)
        # We can only support remote benchmarks if we have a dedicated daemon that is not only bound to 127.0.0.1
        cfg.add(config.Scope.application, "system", "remote.benchmarking.supported", already_running)
    except RuntimeError as e:
        logger.exception("Could not bootstrap actor system.")
        if str(e) == "Unable to determine valid external socket address.":
            console.warn("Could not determine a socket address. Are you running without any network? Switching to degraded mode.",
                         logger=logger)
            actor.use_offline_actor_system()
            actors = actor.bootstrap_actor_system(try_join=True)
        else:
            raise
    try:
        runnable(cfg)
    finally:
        # We only shutdown the actor system if it was not already running before
        if not already_running:
            shutdown_complete = False
            times_interrupted = 0
            while not shutdown_complete and times_interrupted < 2:
                try:
                    logger.info("Attempting to shutdown internal actor system.")
                    actors.shutdown()
                    # note that this check will only evaluate to True for a TCP-based actor system.
                    timeout = 15
                    while actor.actor_system_already_running() and timeout > 0:
                        logger.info("Actor system is still running. Waiting...")
                        time.sleep(1)
                        timeout -= 1
                    if timeout > 0:
                        shutdown_complete = True
                        logger.info("Shutdown completed.")
                    else:
                        logger.warning("Shutdown timed out. Actor system is still running.")
                        break
                except KeyboardInterrupt:
                    times_interrupted += 1
                    logger.warning("User interrupted shutdown of internal actor system.")
                    console.info("Please wait a moment for Rally's internal components to shutdown.")
            if not shutdown_complete and times_interrupted > 0:
                logger.warning("Terminating after user has interrupted actor system shutdown explicitly for [%d] times." % times_interrupted)
                console.println("")
                console.warn("Terminating now at the risk of leaving child processes behind.")
                console.println("")
                console.warn("The next race may fail due to an unclean shutdown.")
                console.println("")
                console.println(SKULL)
                console.println("")
            elif not shutdown_complete:
                console.warn("Could not terminate all internal processes within timeout. Please check and force-terminate all Rally processes.")
Beispiel #4
0
def start(args):
    if actor.actor_system_already_running():
        raise exceptions.RallyError("An actor system appears to be already running.")
    # TheSpian writes the following warning upon start (at least) on Mac OS X:
    #
    # WARNING:root:Unable to get address info for address 103.1.168.192.in-addr.arpa (AddressFamily.AF_INET,\
    # SocketKind.SOCK_DGRAM, 17, 0): <class 'socket.gaierror'> [Errno 8] nodename nor servname provided, or not known
    #
    # Therefore, we will not show warnings but only errors.
    logging.basicConfig(level=logging.ERROR)
    actor.bootstrap_actor_system(local_ip=args.node_ip, coordinator_ip=args.coordinator_ip)
    console.info("Successfully started actor system on node [%s] with coordinator node IP [%s]." % (args.node_ip, args.coordinator_ip))
Beispiel #5
0
def start(args):
    if actor.actor_system_already_running():
        raise exceptions.RallyError("An actor system appears to be already running.")
    # TheSpian writes the following warning upon start (at least) on Mac OS X:
    #
    # WARNING:root:Unable to get address info for address 103.1.168.192.in-addr.arpa (AddressFamily.AF_INET,\
    # SocketKind.SOCK_DGRAM, 17, 0): <class 'socket.gaierror'> [Errno 8] nodename nor servname provided, or not known
    #
    # Therefore, we will not show warnings but only errors.
    logging.basicConfig(level=logging.ERROR)
    actor.bootstrap_actor_system(local_ip=args.node_ip, coordinator_ip=args.coordinator_ip)
    console.info("Successfully started actor system on node [%s] with coordinator node IP [%s]." % (args.node_ip, args.coordinator_ip))
Beispiel #6
0
def stop(raise_errors=True):
    if actor.actor_system_already_running():
        # noinspection PyBroadException
        try:
            # TheSpian writes the following warning upon start (at least) on Mac OS X:
            #
            # WARNING:root:Unable to get address info for address 103.1.168.192.in-addr.arpa (AddressFamily.AF_INET,\
            # SocketKind.SOCK_DGRAM, 17, 0): <class 'socket.gaierror'> [Errno 8] nodename nor servname provided, or not known
            #
            # Therefore, we will not show warnings but only errors.
            logging.basicConfig(level=logging.ERROR)
            running_system = actor.bootstrap_actor_system(try_join=True)
            running_system.shutdown()
            # await termination...
            console.info("Shutting down actor system.", end="", flush=True)
            while actor.actor_system_already_running():
                console.println(".", end="", flush=True)
                time.sleep(1)
            console.println(" [OK]")
        except BaseException:
            console.error("Could not shut down actor system.")
            if raise_errors:
                # raise again so user can see the error
                raise
    elif raise_errors:
        console.error(
            "Could not shut down actor system: Actor system is not running.")
        sys.exit(1)
Beispiel #7
0
 def setup(self):
     # at this point an actor system has to run and we should only join
     self.actor_system = actor.bootstrap_actor_system(try_join=True)
     self.mechanic = self.actor_system.createActor(
         mechanic.MechanicActor,
         targetActorRequirements={"coordinator": True},
         globalName="/rally/mechanic/coordinator")
     logger.info("Asking mechanic to start the engine.")
     result = self.actor_system.ask(
         self.mechanic,
         mechanic.StartEngine(self.cfg, self.metrics_store.open_context,
                              self.sources, self.build, self.distribution,
                              self.external, self.docker))
     if isinstance(result, mechanic.EngineStarted):
         logger.info("Mechanic has started engine successfully.")
         self.metrics_store.meta_info = result.system_meta_info
         cluster = result.cluster_meta_info
         self.race_store.store_race(self.track, cluster.hosts,
                                    cluster.revision,
                                    cluster.distribution_version)
         console.info("Racing on track [%s], challenge [%s] and car [%s]" %
                      (self.track,
                       self.track.find_challenge_or_default(
                           self.cfg.opts("track", "challenge.name")),
                       self.cfg.opts("mechanic", "car.name")))
         # just ensure it is optically separated
         console.println("")
     elif isinstance(result, mechanic.Failure):
         logger.info("Starting engine has failed. Reason [%s]." %
                     result.message)
         raise exceptions.RallyError(result.message)
     else:
         raise exceptions.RallyError(
             "Mechanic has not started engine but instead [%s]. Terminating race without result."
             % str(result))
Beispiel #8
0
    def setup(self):
        # at this point an actor system has to run and we should only join
        self.actor_system = actor.bootstrap_actor_system(try_join=True)
        self.mechanic = self.actor_system.createActor(mechanic.MechanicActor,
                                                      targetActorRequirements={"coordinator": True},
                                                      globalName="/rally/mechanic/coordinator")
        logger.info("Asking mechanic to start the engine.")
        # This can only work accurately if the user has already specified the correct version!
        cluster_settings = self.race.challenge.cluster_settings
        result = self.actor_system.ask(self.mechanic,
                                       mechanic.StartEngine(
                                           self.cfg, self.metrics_store.open_context, cluster_settings,
                                           self.sources, self.build, self.distribution, self.external, self.docker))
        if isinstance(result, mechanic.EngineStarted):
            logger.info("Mechanic has started engine successfully.")
            self.metrics_store.meta_info = result.system_meta_info
            cluster = result.cluster_meta_info
            self.race.cluster = cluster
            if not self.cfg.exists("mechanic", "distribution.version"):
                self.cfg.add(config.Scope.benchmark, "mechanic", "distribution.version", cluster.distribution_version)
                logger.info("Reloading track based for distribution version [%s]" % cluster.distribution_version)
                t = self._load_track()
                self.race.track = t
                self.race.challenge = self._find_challenge(t)

            console.info("Racing on track [%s], challenge [%s] and car [%s]\n"
                         % (self.race.track_name, self.race.challenge_name, self.race.car))
        elif isinstance(result, mechanic.Failure):
            logger.info("Starting engine has failed. Reason [%s]." % result.message)
            raise exceptions.RallyError(result.message)
        else:
            raise exceptions.RallyError("Mechanic has not started engine but instead [%s]. Terminating race without result." % str(result))
Beispiel #9
0
def race(cfg, sources=False, distribution=False, external=False, docker=False):
    logger = logging.getLogger(__name__)
    # at this point an actor system has to run and we should only join
    actor_system = actor.bootstrap_actor_system(try_join=True)
    benchmark_actor = actor_system.createActor(
        BenchmarkActor, targetActorRequirements={"coordinator": True})
    try:
        result = actor_system.ask(
            benchmark_actor, Setup(cfg, sources, distribution, external,
                                   docker))
        if isinstance(result, Success):
            logger.info("Benchmark has finished successfully.")
        # may happen if one of the load generators has detected that the user has cancelled the benchmark.
        elif isinstance(result, actor.BenchmarkCancelled):
            logger.info(
                "User has cancelled the benchmark (detected by actor).")
        elif isinstance(result, actor.BenchmarkFailure):
            logger.error("A benchmark failure has occurred")
            raise exceptions.RallyError(result.message, result.cause)
        else:
            raise exceptions.RallyError(
                "Got an unexpected result during benchmarking: [%s]." %
                str(result))
    except KeyboardInterrupt:
        logger.info(
            "User has cancelled the benchmark (detected by race control).")
        # notify the coordinator so it can properly handle this state. Do it blocking so we don't have a race between this message
        # and the actor exit request.
        actor_system.ask(benchmark_actor, actor.BenchmarkCancelled())
        raise exceptions.UserInterrupted(
            "User has cancelled the benchmark (detected by race control)."
        ) from None
    finally:
        logger.info("Telling benchmark actor to exit.")
        actor_system.tell(benchmark_actor, thespian.actors.ActorExitRequest())
Beispiel #10
0
def stop(raise_errors=True):
    if actor.actor_system_already_running():
        try:
            # TheSpian writes the following warning upon start (at least) on Mac OS X:
            #
            # WARNING:root:Unable to get address info for address 103.1.168.192.in-addr.arpa (AddressFamily.AF_INET,\
            # SocketKind.SOCK_DGRAM, 17, 0): <class 'socket.gaierror'> [Errno 8] nodename nor servname provided, or not known
            #
            # Therefore, we will not show warnings but only errors.
            logging.basicConfig(level=logging.ERROR)
            running_system = actor.bootstrap_actor_system(try_join=True)
            running_system.shutdown()
            # await termination...
            console.info("Shutting down actor system.", end="", flush=True)
            while actor.actor_system_already_running():
                console.println(".", end="", flush=True)
                time.sleep(1)
            console.println(" [OK]")
        except BaseException:
            console.error("Could not shut down actor system.")
            if raise_errors:
                # raise again so user can see the error
                raise
    elif raise_errors:
        console.error("Could not shut down actor system: Actor system is not running.")
        sys.exit(1)
Beispiel #11
0
def list_facts(cfg):
    console.info("This is an experimental command and subject to change.")
    # provide a custom error message
    target_hosts = cfg.opts("facts", "hosts", mandatory=False)
    if not target_hosts:
        raise exceptions.SystemSetupError(
            "Please define a target host with --target-hosts")
    if len(target_hosts) > 1:
        raise exceptions.SystemSetupError(
            "Only one target host is supported at the moment but you provided %s"
            % target_hosts)

    # at this point an actor system has to run and we should only join
    actor_system = actor.bootstrap_actor_system(try_join=True)
    facts_actor = actor_system.createActor(
        FactsActor, targetActorRequirements={"ip": target_hosts[0]})
    result = actor_system.ask(facts_actor, GatherFacts())
    if isinstance(result, Facts):
        console.println(json.dumps(result.facts, indent="  "))
    else:
        raise exceptions.RallyError("Could not gather facts: [%s]." %
                                    str(result))
Beispiel #12
0
def race(cfg, sources=False, build=False, distribution=False, external=False, docker=False):
    # at this point an actor system has to run and we should only join
    actor_system = actor.bootstrap_actor_system(try_join=True)
    benchmark_actor = actor_system.createActor(BenchmarkActor, targetActorRequirements={"coordinator": True})
    try:
        result = actor_system.ask(benchmark_actor, Setup(cfg, sources, build, distribution, external, docker))
        if isinstance(result, Success):
            logger.info("Benchmark has finished successfully.")
        # may happen if one of the load generators has detected that the user has cancelled the benchmark.
        elif isinstance(result, actor.BenchmarkCancelled):
            logger.info("User has cancelled the benchmark (detected by actor).")
        elif isinstance(result, actor.BenchmarkFailure):
            logger.error("A benchmark failure has occurred")
            raise exceptions.RallyError(result.message, result.cause)
        else:
            raise exceptions.RallyError("Got an unexpected result during benchmarking: [%s]." % str(result))
    except KeyboardInterrupt:
        logger.info("User has cancelled the benchmark (detected by race control).")
        # notify the coordinator so it can properly handle this state. Do it blocking so we don't have a race between this message
        # and the actor exit request.
        actor_system.ask(benchmark_actor, actor.BenchmarkCancelled())
    finally:
        logger.info("Telling benchmark actor to exit.")
        actor_system.tell(benchmark_actor, thespian.actors.ActorExitRequest())
Beispiel #13
0
def with_actor_system(runnable, cfg):
    logger = logging.getLogger(__name__)
    already_running = actor.actor_system_already_running()
    logger.info("Actor system already running locally? [%s]",
                str(already_running))
    try:
        actors = actor.bootstrap_actor_system(
            try_join=already_running, prefer_local_only=not already_running)
        # We can only support remote benchmarks if we have a dedicated daemon that is not only bound to 127.0.0.1
        cfg.add(config.Scope.application, "system",
                "remote.benchmarking.supported", already_running)
    # This happens when the admin process could not be started, e.g. because it could not open a socket.
    except thespian.actors.InvalidActorAddress:
        logger.info("Falling back to offline actor system.")
        actor.use_offline_actor_system()
        actors = actor.bootstrap_actor_system(try_join=True)
    except KeyboardInterrupt:
        raise exceptions.UserInterrupted(
            "User has cancelled the benchmark (detected whilst bootstrapping actor system)."
        ) from None
    except Exception as e:
        logger.exception("Could not bootstrap actor system.")
        if str(e) == "Unable to determine valid external socket address.":
            console.warn(
                "Could not determine a socket address. Are you running without any network? Switching to degraded mode.",
                logger=logger)
            logger.info("Falling back to offline actor system.")
            actor.use_offline_actor_system()
            actors = actor.bootstrap_actor_system(try_join=True)
        else:
            raise
    try:
        runnable(cfg)
    finally:
        # We only shutdown the actor system if it was not already running before
        if not already_running:
            shutdown_complete = False
            times_interrupted = 0
            while not shutdown_complete and times_interrupted < 2:
                try:
                    # give some time for any outstanding messages to be delivered to the actor system
                    time.sleep(3)
                    logger.info(
                        "Attempting to shutdown internal actor system.")
                    actors.shutdown()
                    # note that this check will only evaluate to True for a TCP-based actor system.
                    timeout = 15
                    while actor.actor_system_already_running() and timeout > 0:
                        logger.info(
                            "Actor system is still running. Waiting...")
                        time.sleep(1)
                        timeout -= 1
                    if timeout > 0:
                        shutdown_complete = True
                        logger.info("Shutdown completed.")
                    else:
                        logger.warning(
                            "Shutdown timed out. Actor system is still running."
                        )
                        break
                except KeyboardInterrupt:
                    times_interrupted += 1
                    logger.warning(
                        "User interrupted shutdown of internal actor system.")
                    console.info(
                        "Please wait a moment for Rally's internal components to shutdown."
                    )
            if not shutdown_complete and times_interrupted > 0:
                logger.warning(
                    "Terminating after user has interrupted actor system shutdown explicitly for [%d] times.",
                    times_interrupted)
                console.println("")
                console.warn(
                    "Terminating now at the risk of leaving child processes behind."
                )
                console.println("")
                console.warn(
                    "The next race may fail due to an unclean shutdown.")
                console.println("")
                console.println(SKULL)
                console.println("")
                raise exceptions.UserInterrupted(
                    f"User has cancelled the benchmark (shutdown not complete as user interrupted "
                    f"{times_interrupted} times).") from None
            elif not shutdown_complete:
                console.warn(
                    "Could not terminate all internal processes within timeout. Please check and force-terminate all Rally processes."
                )