Ejemplo n.º 1
0
    def start_benchmark(self, msg, sender):
        logger.info("Benchmark is about to start.")
        self.start_sender = sender
        self.config = msg.config
        self.quiet = msg.config.opts("system", "quiet.mode", mandatory=False, default_value=False)
        self.es = client.EsClientFactory(msg.config.opts("client", "hosts"), msg.config.opts("client", "options")).create()
        self.metrics_store = metrics.InMemoryMetricsStore(config=self.config, meta_info=msg.metrics_meta_info)
        invocation = self.config.opts("meta", "time.start")
        track_name = self.config.opts("system", "track")
        challenge_name = self.config.opts("benchmarks", "challenge")
        selected_car_name = self.config.opts("benchmarks", "car")
        self.metrics_store.open(invocation, track_name, challenge_name, selected_car_name)

        track = msg.track
        challenge = select_challenge(self.config, track)
        setup_index(self.es, track, challenge)
        allocator = Allocator(challenge.schedule)
        self.allocations = allocator.allocations
        self.number_of_steps = len(allocator.join_points) - 1
        self.ops_per_join_point = allocator.operations_per_joinpoint

        logger.info("Benchmark consists of [%d] steps executed by (at most) [%d] clients as specified by the allocation matrix:\n%s" %
                    (self.number_of_steps, len(self.allocations), self.allocations))

        for client_id in range(allocator.clients):
            self.drivers.append(self.createActor(LoadGenerator))
        for client_id, driver in enumerate(self.drivers):
            self.send(driver, StartLoadGenerator(client_id, self.config, track.indices, self.allocations[client_id]))

        self.update_progress_message()
        self.wakeupAfter(datetime.timedelta(seconds=Driver.WAKEUP_INTERVAL_SECONDS))
Ejemplo n.º 2
0
 def setUp(self):
     self.cfg = config.Config()
     self.cfg.add(config.Scope.application, "system", "env.name",
                  "unittest")
     self.cfg.add(config.Scope.application, "track", "params", {})
     self.metrics_store = metrics.InMemoryMetricsStore(self.cfg,
                                                       clock=StaticClock)
Ejemplo n.º 3
0
    def test_calculate_simple_index_stats(self):
        cfg = config.Config()
        cfg.add(config.Scope.application, "system", "env.name", "unittest")

        store = metrics.InMemoryMetricsStore(config=cfg, clear=True)
        store.open(datetime.datetime.now(), "test", "unittest", "unittest_car")

        store.put_value_cluster_level("throughput", 500, unit="docs/s", operation="index", operation_type=track.OperationType.Index)
        store.put_value_cluster_level("throughput", 1000, unit="docs/s", operation="index", operation_type=track.OperationType.Index)
        store.put_value_cluster_level("throughput", 2000, unit="docs/s", operation="index", operation_type=track.OperationType.Index)

        store.put_value_cluster_level("latency", 2800, unit="ms", operation="index", operation_type=track.OperationType.Index,
                                      sample_type=metrics.SampleType.Warmup)
        store.put_value_cluster_level("latency", 200, unit="ms", operation="index", operation_type=track.OperationType.Index)
        store.put_value_cluster_level("latency", 220, unit="ms", operation="index", operation_type=track.OperationType.Index)
        store.put_value_cluster_level("latency", 225, unit="ms", operation="index", operation_type=track.OperationType.Index)

        store.put_value_cluster_level("service_time", 250, unit="ms", operation="index", operation_type=track.OperationType.Index,
                                      sample_type=metrics.SampleType.Warmup)
        store.put_value_cluster_level("service_time", 190, unit="ms", operation="index", operation_type=track.OperationType.Index)
        store.put_value_cluster_level("service_time", 200, unit="ms", operation="index", operation_type=track.OperationType.Index)
        store.put_value_cluster_level("service_time", 215, unit="ms", operation="index", operation_type=track.OperationType.Index)

        index = track.Task(operation=track.Operation(name="index", operation_type=track.OperationType.Index, granularity_unit="docs/s"))
        challenge = track.Challenge(name="unittest", description="", index_settings=None, schedule=[index])

        stats = reporter.Stats(store, challenge)

        self.assertEqual((500, 1000, 2000, "docs/s"), stats.op_metrics["index"]["throughput"])
        self.assertEqual(collections.OrderedDict([(50.0, 220), (100, 225)]), stats.op_metrics["index"]["latency"])
        self.assertEqual(collections.OrderedDict([(50.0, 200), (100, 215)]), stats.op_metrics["index"]["service_time"])
Ejemplo n.º 4
0
    def on_start_engine(self, msg, sender):
        logger.info("Received signal from race control to start engine.")
        self.race_control = sender
        self.cfg = msg.cfg
        self.metrics_store = metrics.InMemoryMetricsStore(self.cfg)
        self.metrics_store.open(ctx=msg.open_metrics_context)

        # In our startup procedure we first create all mechanics. Only if this succeeds we'll continue.
        mechanics_and_start_message = []
        hosts = self.cfg.opts("client", "hosts")
        if len(hosts) == 0:
            raise exceptions.LaunchError("No target hosts are configured.")

        if msg.external:
            logger.info("Cluster will not be provisioned by Rally.")
            # just create one actor for this special case and run it on the coordinator node (i.e. here)
            m = self.createActor(NodeMechanicActor,
                                 #globalName="/rally/mechanic/worker/external",
                                 targetActorRequirements={"coordinator": True})
            self.children.append(m)
            mechanics_and_start_message.append((m, msg.for_nodes(ip=hosts)))
        else:
            logger.info("Cluster consisting of %s will be provisioned by Rally." % hosts)
            all_ips_and_ports = to_ip_port(hosts)
            all_node_ips = extract_all_node_ips(all_ips_and_ports)
            for ip_port, nodes in nodes_by_host(all_ips_and_ports).items():
                ip, port = ip_port
                if ip == "127.0.0.1":
                    m = self.createActor(NodeMechanicActor,
                                         #globalName="/rally/mechanic/worker/localhost",
                                         targetActorRequirements={"coordinator": True})
                    self.children.append(m)
                    mechanics_and_start_message.append((m, msg.for_nodes(all_node_ips, ip, port, nodes)))
                else:
                    if self.cfg.opts("system", "remote.benchmarking.supported"):
                        logger.info("Benchmarking against %s with external Rally daemon." % hosts)
                    else:
                        logger.error("User tried to benchmark against %s but no external Rally daemon has been started." % hosts)
                        raise exceptions.SystemSetupError("To benchmark remote hosts (e.g. %s) you need to start the Rally daemon "
                                                          "on each machine including this one." % ip)
                    already_running = actor.actor_system_already_running(ip=ip)
                    logger.info("Actor system on [%s] already running? [%s]" % (ip, str(already_running)))
                    if not already_running:
                        console.println("Waiting for Rally daemon on [%s] " % ip, end="", flush=True)
                    while not actor.actor_system_already_running(ip=ip):
                        console.println(".", end="", flush=True)
                        time.sleep(3)
                    if not already_running:
                        console.println(" [OK]")
                    m = self.createActor(NodeMechanicActor,
                                         #globalName="/rally/mechanic/worker/%s" % ip,
                                         targetActorRequirements={"ip": ip})
                    mechanics_and_start_message.append((m, msg.for_nodes(all_node_ips, ip, port, nodes)))
                    self.children.append(m)
        self.status = "starting"
        self.received_responses = []
        for mechanic_actor, start_message in mechanics_and_start_message:
            self.send(mechanic_actor, start_message)
Ejemplo n.º 5
0
    def start_benchmark(self, msg, sender):
        self.start_sender = sender
        self.config = msg.config
        current_track = msg.track

        logger.info("Preparing track")
        # TODO #71: Reconsider this in case we distribute drivers. *For now* the driver will only be on a single machine, so we're safe.
        track.prepare_track(current_track, self.config)

        logger.info("Benchmark is about to start.")
        self.quiet = self.config.opts("system",
                                      "quiet.mode",
                                      mandatory=False,
                                      default_value=False)
        self.es = client.EsClientFactory(self.config.opts("client", "hosts"),
                                         self.config.opts("client",
                                                          "options")).create()
        self.metrics_store = metrics.InMemoryMetricsStore(
            config=self.config, meta_info=msg.metrics_meta_info, lap=msg.lap)
        invocation = self.config.opts("meta", "time.start")
        expected_cluster_health = self.config.opts("benchmarks",
                                                   "cluster.health")
        track_name = self.config.opts("benchmarks", "track")
        challenge_name = self.config.opts("benchmarks", "challenge")
        selected_car_name = self.config.opts("benchmarks", "car")
        self.metrics_store.open(invocation, track_name, challenge_name,
                                selected_car_name)

        challenge = select_challenge(self.config, current_track)
        es_version = self.config.opts("source", "distribution.version")
        for index in current_track.indices:
            setup_index(self.es, index, challenge.index_settings)
        wait_for_status(self.es, es_version, expected_cluster_health)
        allocator = Allocator(challenge.schedule)
        self.allocations = allocator.allocations
        self.number_of_steps = len(allocator.join_points) - 1
        self.ops_per_join_point = allocator.operations_per_joinpoint

        logger.info(
            "Benchmark consists of [%d] steps executed by (at most) [%d] clients as specified by the allocation matrix:\n%s"
            % (self.number_of_steps, len(self.allocations), self.allocations))

        for client_id in range(allocator.clients):
            self.drivers.append(self.createActor(LoadGenerator))
        for client_id, driver in enumerate(self.drivers):
            self.send(
                driver,
                StartLoadGenerator(client_id, self.config, current_track,
                                   self.allocations[client_id]))

        self.update_progress_message()
        self.wakeupAfter(
            datetime.timedelta(seconds=Driver.WAKEUP_INTERVAL_SECONDS))
Ejemplo n.º 6
0
    def test_externalize_and_bulk_add(self):
        self.metrics_store.open(EsMetricsTests.TRIAL_TIMESTAMP, "test", "append-no-conflicts", "defaults", create=True)
        self.metrics_store.lap = 1
        self.metrics_store.put_count_cluster_level("final_index_size", 1000, "GB")

        self.assertEqual(1, len(self.metrics_store.DOCS))
        memento = self.metrics_store.to_externalizable()

        self.metrics_store.close()
        del self.metrics_store

        self.metrics_store = metrics.InMemoryMetricsStore(self.cfg, clock=StaticClock)
        self.assertEqual(0, len(self.metrics_store.DOCS))

        self.metrics_store.bulk_add(memento)
        self.assertEqual(1, len(self.metrics_store.DOCS))
        self.assertEqual(1000, self.metrics_store.get_one("final_index_size"))
Ejemplo n.º 7
0
    def receiveMessage(self, msg, sender):
        # at the moment, we implement all message handling blocking. This is not ideal but simple to get started with. Besides, the caller
        # needs to block anyway. The only reason we implement mechanic as an actor is to distribute them.
        # noinspection PyBroadException
        try:
            logger.debug(
                "NodeMechanicActor#receiveMessage(msg = [%s] sender = [%s])" %
                (str(type(msg)), str(sender)))
            if isinstance(msg, StartNodes):
                self.host = msg.ip
                if msg.external:
                    logger.info(
                        "Connecting to externally provisioned nodes on [%s]." %
                        msg.ip)
                else:
                    logger.info("Starting node(s) %s on [%s]." %
                                (msg.node_ids, msg.ip))

                # Load node-specific configuration
                self.config = config.auto_load_local_config(
                    msg.cfg,
                    additional_sections=[
                        # only copy the relevant bits
                        "track",
                        "mechanic",
                        "client",
                        # allow metrics store to extract race meta-data
                        "race",
                        "source"
                    ])
                # set root path (normally done by the main entry point)
                self.config.add(config.Scope.application, "node", "rally.root",
                                paths.rally_root())
                if not msg.external:
                    self.config.add(config.Scope.benchmark, "provisioning",
                                    "node.ip", msg.ip)
                    # we need to override the port with the value that the user has specified instead of using the default value (39200)
                    self.config.add(config.Scope.benchmark, "provisioning",
                                    "node.http.port", msg.port)
                    self.config.add(config.Scope.benchmark, "provisioning",
                                    "node.ids", msg.node_ids)

                self.metrics_store = metrics.InMemoryMetricsStore(self.config)
                self.metrics_store.open(ctx=msg.open_metrics_context)
                # avoid follow-up errors in case we receive an unexpected ActorExitRequest due to an early failure in a parent actor.
                self.metrics_store.lap = 0

                self.mechanic = create(self.config, self.metrics_store,
                                       msg.all_node_ips, msg.cluster_settings,
                                       msg.sources, msg.build,
                                       msg.distribution, msg.external,
                                       msg.docker)
                nodes = self.mechanic.start_engine()
                self.running = True
                self.send(
                    sender,
                    NodesStarted([NodeMetaInfo(node) for node in nodes],
                                 self.metrics_store.meta_info))
            elif isinstance(msg, ApplyMetricsMetaInfo):
                self.metrics_store.merge_meta_info(msg.meta_info)
                self.send(sender, MetricsMetaInfoApplied())
            elif isinstance(msg, ResetRelativeTime):
                logger.info(
                    "Resetting relative time of system metrics store on host [%s]."
                    % self.host)
                self.metrics_store.reset_relative_time()
            elif isinstance(msg, OnBenchmarkStart):
                self.metrics_store.lap = msg.lap
                self.mechanic.on_benchmark_start()
                self.send(sender, BenchmarkStarted())
            elif isinstance(msg, OnBenchmarkStop):
                self.mechanic.on_benchmark_stop()
                # clear metrics store data to not send duplicate system metrics data
                self.send(
                    sender,
                    BenchmarkStopped(
                        self.metrics_store.to_externalizable(clear=True)))
            elif isinstance(msg, StopNodes):
                logger.info("Stopping nodes %s." % self.mechanic.nodes)
                self.mechanic.stop_engine()
                self.send(sender,
                          NodesStopped(self.metrics_store.to_externalizable()))
                # clear all state as the mechanic might get reused later
                self.running = False
                self.config = None
                self.mechanic = None
                self.metrics_store = None
            elif isinstance(msg, thespian.actors.ActorExitRequest):
                if self.running:
                    logger.info("Stopping nodes %s (due to ActorExitRequest)" %
                                self.mechanic.nodes)
                    self.mechanic.stop_engine()
                    self.running = False
        except BaseException:
            self.running = False
            logger.exception("Cannot process message [%s]" % msg)
            # avoid "can't pickle traceback objects"
            import traceback
            ex_type, ex_value, ex_traceback = sys.exc_info()
            self.send(sender,
                      actor.BenchmarkFailure(ex_value, traceback.format_exc()))
Ejemplo n.º 8
0
    def receiveMessage(self, msg, sender):
        # at the moment, we implement all message handling blocking. This is not ideal but simple to get started with. Besides, the caller
        # needs to block anyway. The only reason we implement mechanic as an actor is to distribute them.
        # noinspection PyBroadException
        try:
            logger.debug(
                "NodeMechanicActor#receiveMessage(msg = [%s] sender = [%s])" %
                (str(type(msg)), str(sender)))
            if isinstance(msg, StartEngine):
                logger.info("Starting engine")
                # Load node-specific configuration
                self.config = config.Config(config_name=msg.cfg.name)
                self.config.load_config()
                self.config.add(config.Scope.application, "node", "rally.root",
                                paths.rally_root())
                # copy only the necessary configuration sections
                self.config.add_all(msg.cfg, "system")
                self.config.add_all(msg.cfg, "client")
                self.config.add_all(msg.cfg, "track")
                self.config.add_all(msg.cfg, "mechanic")
                if msg.port is not None:
                    # we need to override the port with the value that the user has specified instead of using the default value (39200)
                    self.config.add(config.Scope.benchmark, "provisioning",
                                    "node.http.port", msg.port)

                self.metrics_store = metrics.InMemoryMetricsStore(self.config)
                self.metrics_store.open(ctx=msg.open_metrics_context)

                self.mechanic = create(self.config, self.metrics_store,
                                       self.single_machine, msg.sources,
                                       msg.build, msg.distribution,
                                       msg.external, msg.docker)
                cluster = self.mechanic.start_engine()
                self.send(
                    sender,
                    EngineStarted(
                        ClusterMetaInfo(cluster.hosts, cluster.source_revision,
                                        cluster.distribution_version),
                        self.metrics_store.meta_info))
            elif isinstance(msg, OnBenchmarkStart):
                self.metrics_store.lap = msg.lap
                self.mechanic.on_benchmark_start()
                self.send(sender, Success())
            elif isinstance(msg, OnBenchmarkStop):
                self.mechanic.on_benchmark_stop()
                # clear metrics store data to not send duplicate system metrics data
                self.send(
                    sender,
                    BenchmarkStopped(
                        self.metrics_store.to_externalizable(clear=True)))
            elif isinstance(msg, StopEngine):
                logger.info("Stopping engine")
                self.mechanic.stop_engine()
                self.send(
                    sender,
                    EngineStopped(self.metrics_store.to_externalizable()))
                # clear all state as the mechanic might get reused later
                self.config = None
                self.mechanic = None
                self.metrics_store = None
        except BaseException:
            logger.exception("Cannot process message [%s]" % msg)
            # avoid "can't pickle traceback objects"
            import traceback
            ex_type, ex_value, ex_traceback = sys.exc_info()
            self.send(sender, Failure(ex_value, traceback.format_exc()))
Ejemplo n.º 9
0
    def start_benchmark(self, msg, sender):
        self.start_sender = sender
        self.config = msg.config
        self.track = msg.track

        track_name = self.track.name
        challenge_name = self.track.find_challenge_or_default(
            self.config.opts("track", "challenge.name")).name
        selected_car_name = self.config.opts("mechanic", "car.name")

        logger.info("Preparing track [%s]" % track_name)
        # TODO #71: Reconsider this in case we distribute drivers. *For now* the driver will only be on a single machine, so we're safe.
        track.prepare_track(self.track, self.config)

        logger.info(
            "Benchmark for track [%s], challenge [%s] and car [%s] is about to start."
            % (track_name, challenge_name, selected_car_name))
        self.quiet = self.config.opts("system",
                                      "quiet.mode",
                                      mandatory=False,
                                      default_value=False)
        self.es = client.EsClientFactory(self.config.opts("client", "hosts"),
                                         self.config.opts("client",
                                                          "options")).create()
        self.metrics_store = metrics.InMemoryMetricsStore(
            cfg=self.config, meta_info=msg.metrics_meta_info, lap=msg.lap)
        invocation = self.config.opts("system", "time.start")
        expected_cluster_health = self.config.opts("benchmarks",
                                                   "cluster.health")
        self.metrics_store.open(invocation, track_name, challenge_name,
                                selected_car_name)

        self.challenge = select_challenge(self.config, self.track)
        for template in self.track.templates:
            setup_template(self.es, template)

        for index in self.track.indices:
            setup_index(self.es, index, self.challenge.index_settings)
        wait_for_status(self.es, expected_cluster_health)
        allocator = Allocator(self.challenge.schedule)
        self.allocations = allocator.allocations
        self.number_of_steps = len(allocator.join_points) - 1
        self.ops_per_join_point = allocator.operations_per_joinpoint

        logger.info(
            "Benchmark consists of [%d] steps executed by (at most) [%d] clients as specified by the allocation matrix:\n%s"
            % (self.number_of_steps, len(self.allocations), self.allocations))

        for client_id in range(allocator.clients):
            self.drivers.append(
                self.createActor(
                    LoadGenerator,
                    globalName="/rally/driver/worker/%s" % str(client_id),
                    targetActorRequirements={"coordinator": True}))
        for client_id, driver in enumerate(self.drivers):
            logger.info("Starting load generator [%d]." % client_id)
            self.send(
                driver,
                StartLoadGenerator(client_id, self.config, self.track,
                                   self.allocations[client_id]))

        self.update_progress_message()
        self.wakeupAfter(
            datetime.timedelta(seconds=Driver.WAKEUP_INTERVAL_SECONDS))