def start_benchmark(self, msg, sender): logger.info("Benchmark is about to start.") self.start_sender = sender self.config = msg.config self.quiet = msg.config.opts("system", "quiet.mode", mandatory=False, default_value=False) self.es = client.EsClientFactory(msg.config.opts("client", "hosts"), msg.config.opts("client", "options")).create() self.metrics_store = metrics.InMemoryMetricsStore(config=self.config, meta_info=msg.metrics_meta_info) invocation = self.config.opts("meta", "time.start") track_name = self.config.opts("system", "track") challenge_name = self.config.opts("benchmarks", "challenge") selected_car_name = self.config.opts("benchmarks", "car") self.metrics_store.open(invocation, track_name, challenge_name, selected_car_name) track = msg.track challenge = select_challenge(self.config, track) setup_index(self.es, track, challenge) allocator = Allocator(challenge.schedule) self.allocations = allocator.allocations self.number_of_steps = len(allocator.join_points) - 1 self.ops_per_join_point = allocator.operations_per_joinpoint logger.info("Benchmark consists of [%d] steps executed by (at most) [%d] clients as specified by the allocation matrix:\n%s" % (self.number_of_steps, len(self.allocations), self.allocations)) for client_id in range(allocator.clients): self.drivers.append(self.createActor(LoadGenerator)) for client_id, driver in enumerate(self.drivers): self.send(driver, StartLoadGenerator(client_id, self.config, track.indices, self.allocations[client_id])) self.update_progress_message() self.wakeupAfter(datetime.timedelta(seconds=Driver.WAKEUP_INTERVAL_SECONDS))
def setUp(self): self.cfg = config.Config() self.cfg.add(config.Scope.application, "system", "env.name", "unittest") self.cfg.add(config.Scope.application, "track", "params", {}) self.metrics_store = metrics.InMemoryMetricsStore(self.cfg, clock=StaticClock)
def test_calculate_simple_index_stats(self): cfg = config.Config() cfg.add(config.Scope.application, "system", "env.name", "unittest") store = metrics.InMemoryMetricsStore(config=cfg, clear=True) store.open(datetime.datetime.now(), "test", "unittest", "unittest_car") store.put_value_cluster_level("throughput", 500, unit="docs/s", operation="index", operation_type=track.OperationType.Index) store.put_value_cluster_level("throughput", 1000, unit="docs/s", operation="index", operation_type=track.OperationType.Index) store.put_value_cluster_level("throughput", 2000, unit="docs/s", operation="index", operation_type=track.OperationType.Index) store.put_value_cluster_level("latency", 2800, unit="ms", operation="index", operation_type=track.OperationType.Index, sample_type=metrics.SampleType.Warmup) store.put_value_cluster_level("latency", 200, unit="ms", operation="index", operation_type=track.OperationType.Index) store.put_value_cluster_level("latency", 220, unit="ms", operation="index", operation_type=track.OperationType.Index) store.put_value_cluster_level("latency", 225, unit="ms", operation="index", operation_type=track.OperationType.Index) store.put_value_cluster_level("service_time", 250, unit="ms", operation="index", operation_type=track.OperationType.Index, sample_type=metrics.SampleType.Warmup) store.put_value_cluster_level("service_time", 190, unit="ms", operation="index", operation_type=track.OperationType.Index) store.put_value_cluster_level("service_time", 200, unit="ms", operation="index", operation_type=track.OperationType.Index) store.put_value_cluster_level("service_time", 215, unit="ms", operation="index", operation_type=track.OperationType.Index) index = track.Task(operation=track.Operation(name="index", operation_type=track.OperationType.Index, granularity_unit="docs/s")) challenge = track.Challenge(name="unittest", description="", index_settings=None, schedule=[index]) stats = reporter.Stats(store, challenge) self.assertEqual((500, 1000, 2000, "docs/s"), stats.op_metrics["index"]["throughput"]) self.assertEqual(collections.OrderedDict([(50.0, 220), (100, 225)]), stats.op_metrics["index"]["latency"]) self.assertEqual(collections.OrderedDict([(50.0, 200), (100, 215)]), stats.op_metrics["index"]["service_time"])
def on_start_engine(self, msg, sender): logger.info("Received signal from race control to start engine.") self.race_control = sender self.cfg = msg.cfg self.metrics_store = metrics.InMemoryMetricsStore(self.cfg) self.metrics_store.open(ctx=msg.open_metrics_context) # In our startup procedure we first create all mechanics. Only if this succeeds we'll continue. mechanics_and_start_message = [] hosts = self.cfg.opts("client", "hosts") if len(hosts) == 0: raise exceptions.LaunchError("No target hosts are configured.") if msg.external: logger.info("Cluster will not be provisioned by Rally.") # just create one actor for this special case and run it on the coordinator node (i.e. here) m = self.createActor(NodeMechanicActor, #globalName="/rally/mechanic/worker/external", targetActorRequirements={"coordinator": True}) self.children.append(m) mechanics_and_start_message.append((m, msg.for_nodes(ip=hosts))) else: logger.info("Cluster consisting of %s will be provisioned by Rally." % hosts) all_ips_and_ports = to_ip_port(hosts) all_node_ips = extract_all_node_ips(all_ips_and_ports) for ip_port, nodes in nodes_by_host(all_ips_and_ports).items(): ip, port = ip_port if ip == "127.0.0.1": m = self.createActor(NodeMechanicActor, #globalName="/rally/mechanic/worker/localhost", targetActorRequirements={"coordinator": True}) self.children.append(m) mechanics_and_start_message.append((m, msg.for_nodes(all_node_ips, ip, port, nodes))) else: if self.cfg.opts("system", "remote.benchmarking.supported"): logger.info("Benchmarking against %s with external Rally daemon." % hosts) else: logger.error("User tried to benchmark against %s but no external Rally daemon has been started." % hosts) raise exceptions.SystemSetupError("To benchmark remote hosts (e.g. %s) you need to start the Rally daemon " "on each machine including this one." % ip) already_running = actor.actor_system_already_running(ip=ip) logger.info("Actor system on [%s] already running? [%s]" % (ip, str(already_running))) if not already_running: console.println("Waiting for Rally daemon on [%s] " % ip, end="", flush=True) while not actor.actor_system_already_running(ip=ip): console.println(".", end="", flush=True) time.sleep(3) if not already_running: console.println(" [OK]") m = self.createActor(NodeMechanicActor, #globalName="/rally/mechanic/worker/%s" % ip, targetActorRequirements={"ip": ip}) mechanics_and_start_message.append((m, msg.for_nodes(all_node_ips, ip, port, nodes))) self.children.append(m) self.status = "starting" self.received_responses = [] for mechanic_actor, start_message in mechanics_and_start_message: self.send(mechanic_actor, start_message)
def start_benchmark(self, msg, sender): self.start_sender = sender self.config = msg.config current_track = msg.track logger.info("Preparing track") # TODO #71: Reconsider this in case we distribute drivers. *For now* the driver will only be on a single machine, so we're safe. track.prepare_track(current_track, self.config) logger.info("Benchmark is about to start.") self.quiet = self.config.opts("system", "quiet.mode", mandatory=False, default_value=False) self.es = client.EsClientFactory(self.config.opts("client", "hosts"), self.config.opts("client", "options")).create() self.metrics_store = metrics.InMemoryMetricsStore( config=self.config, meta_info=msg.metrics_meta_info, lap=msg.lap) invocation = self.config.opts("meta", "time.start") expected_cluster_health = self.config.opts("benchmarks", "cluster.health") track_name = self.config.opts("benchmarks", "track") challenge_name = self.config.opts("benchmarks", "challenge") selected_car_name = self.config.opts("benchmarks", "car") self.metrics_store.open(invocation, track_name, challenge_name, selected_car_name) challenge = select_challenge(self.config, current_track) es_version = self.config.opts("source", "distribution.version") for index in current_track.indices: setup_index(self.es, index, challenge.index_settings) wait_for_status(self.es, es_version, expected_cluster_health) allocator = Allocator(challenge.schedule) self.allocations = allocator.allocations self.number_of_steps = len(allocator.join_points) - 1 self.ops_per_join_point = allocator.operations_per_joinpoint logger.info( "Benchmark consists of [%d] steps executed by (at most) [%d] clients as specified by the allocation matrix:\n%s" % (self.number_of_steps, len(self.allocations), self.allocations)) for client_id in range(allocator.clients): self.drivers.append(self.createActor(LoadGenerator)) for client_id, driver in enumerate(self.drivers): self.send( driver, StartLoadGenerator(client_id, self.config, current_track, self.allocations[client_id])) self.update_progress_message() self.wakeupAfter( datetime.timedelta(seconds=Driver.WAKEUP_INTERVAL_SECONDS))
def test_externalize_and_bulk_add(self): self.metrics_store.open(EsMetricsTests.TRIAL_TIMESTAMP, "test", "append-no-conflicts", "defaults", create=True) self.metrics_store.lap = 1 self.metrics_store.put_count_cluster_level("final_index_size", 1000, "GB") self.assertEqual(1, len(self.metrics_store.DOCS)) memento = self.metrics_store.to_externalizable() self.metrics_store.close() del self.metrics_store self.metrics_store = metrics.InMemoryMetricsStore(self.cfg, clock=StaticClock) self.assertEqual(0, len(self.metrics_store.DOCS)) self.metrics_store.bulk_add(memento) self.assertEqual(1, len(self.metrics_store.DOCS)) self.assertEqual(1000, self.metrics_store.get_one("final_index_size"))
def receiveMessage(self, msg, sender): # at the moment, we implement all message handling blocking. This is not ideal but simple to get started with. Besides, the caller # needs to block anyway. The only reason we implement mechanic as an actor is to distribute them. # noinspection PyBroadException try: logger.debug( "NodeMechanicActor#receiveMessage(msg = [%s] sender = [%s])" % (str(type(msg)), str(sender))) if isinstance(msg, StartNodes): self.host = msg.ip if msg.external: logger.info( "Connecting to externally provisioned nodes on [%s]." % msg.ip) else: logger.info("Starting node(s) %s on [%s]." % (msg.node_ids, msg.ip)) # Load node-specific configuration self.config = config.auto_load_local_config( msg.cfg, additional_sections=[ # only copy the relevant bits "track", "mechanic", "client", # allow metrics store to extract race meta-data "race", "source" ]) # set root path (normally done by the main entry point) self.config.add(config.Scope.application, "node", "rally.root", paths.rally_root()) if not msg.external: self.config.add(config.Scope.benchmark, "provisioning", "node.ip", msg.ip) # we need to override the port with the value that the user has specified instead of using the default value (39200) self.config.add(config.Scope.benchmark, "provisioning", "node.http.port", msg.port) self.config.add(config.Scope.benchmark, "provisioning", "node.ids", msg.node_ids) self.metrics_store = metrics.InMemoryMetricsStore(self.config) self.metrics_store.open(ctx=msg.open_metrics_context) # avoid follow-up errors in case we receive an unexpected ActorExitRequest due to an early failure in a parent actor. self.metrics_store.lap = 0 self.mechanic = create(self.config, self.metrics_store, msg.all_node_ips, msg.cluster_settings, msg.sources, msg.build, msg.distribution, msg.external, msg.docker) nodes = self.mechanic.start_engine() self.running = True self.send( sender, NodesStarted([NodeMetaInfo(node) for node in nodes], self.metrics_store.meta_info)) elif isinstance(msg, ApplyMetricsMetaInfo): self.metrics_store.merge_meta_info(msg.meta_info) self.send(sender, MetricsMetaInfoApplied()) elif isinstance(msg, ResetRelativeTime): logger.info( "Resetting relative time of system metrics store on host [%s]." % self.host) self.metrics_store.reset_relative_time() elif isinstance(msg, OnBenchmarkStart): self.metrics_store.lap = msg.lap self.mechanic.on_benchmark_start() self.send(sender, BenchmarkStarted()) elif isinstance(msg, OnBenchmarkStop): self.mechanic.on_benchmark_stop() # clear metrics store data to not send duplicate system metrics data self.send( sender, BenchmarkStopped( self.metrics_store.to_externalizable(clear=True))) elif isinstance(msg, StopNodes): logger.info("Stopping nodes %s." % self.mechanic.nodes) self.mechanic.stop_engine() self.send(sender, NodesStopped(self.metrics_store.to_externalizable())) # clear all state as the mechanic might get reused later self.running = False self.config = None self.mechanic = None self.metrics_store = None elif isinstance(msg, thespian.actors.ActorExitRequest): if self.running: logger.info("Stopping nodes %s (due to ActorExitRequest)" % self.mechanic.nodes) self.mechanic.stop_engine() self.running = False except BaseException: self.running = False logger.exception("Cannot process message [%s]" % msg) # avoid "can't pickle traceback objects" import traceback ex_type, ex_value, ex_traceback = sys.exc_info() self.send(sender, actor.BenchmarkFailure(ex_value, traceback.format_exc()))
def receiveMessage(self, msg, sender): # at the moment, we implement all message handling blocking. This is not ideal but simple to get started with. Besides, the caller # needs to block anyway. The only reason we implement mechanic as an actor is to distribute them. # noinspection PyBroadException try: logger.debug( "NodeMechanicActor#receiveMessage(msg = [%s] sender = [%s])" % (str(type(msg)), str(sender))) if isinstance(msg, StartEngine): logger.info("Starting engine") # Load node-specific configuration self.config = config.Config(config_name=msg.cfg.name) self.config.load_config() self.config.add(config.Scope.application, "node", "rally.root", paths.rally_root()) # copy only the necessary configuration sections self.config.add_all(msg.cfg, "system") self.config.add_all(msg.cfg, "client") self.config.add_all(msg.cfg, "track") self.config.add_all(msg.cfg, "mechanic") if msg.port is not None: # we need to override the port with the value that the user has specified instead of using the default value (39200) self.config.add(config.Scope.benchmark, "provisioning", "node.http.port", msg.port) self.metrics_store = metrics.InMemoryMetricsStore(self.config) self.metrics_store.open(ctx=msg.open_metrics_context) self.mechanic = create(self.config, self.metrics_store, self.single_machine, msg.sources, msg.build, msg.distribution, msg.external, msg.docker) cluster = self.mechanic.start_engine() self.send( sender, EngineStarted( ClusterMetaInfo(cluster.hosts, cluster.source_revision, cluster.distribution_version), self.metrics_store.meta_info)) elif isinstance(msg, OnBenchmarkStart): self.metrics_store.lap = msg.lap self.mechanic.on_benchmark_start() self.send(sender, Success()) elif isinstance(msg, OnBenchmarkStop): self.mechanic.on_benchmark_stop() # clear metrics store data to not send duplicate system metrics data self.send( sender, BenchmarkStopped( self.metrics_store.to_externalizable(clear=True))) elif isinstance(msg, StopEngine): logger.info("Stopping engine") self.mechanic.stop_engine() self.send( sender, EngineStopped(self.metrics_store.to_externalizable())) # clear all state as the mechanic might get reused later self.config = None self.mechanic = None self.metrics_store = None except BaseException: logger.exception("Cannot process message [%s]" % msg) # avoid "can't pickle traceback objects" import traceback ex_type, ex_value, ex_traceback = sys.exc_info() self.send(sender, Failure(ex_value, traceback.format_exc()))
def start_benchmark(self, msg, sender): self.start_sender = sender self.config = msg.config self.track = msg.track track_name = self.track.name challenge_name = self.track.find_challenge_or_default( self.config.opts("track", "challenge.name")).name selected_car_name = self.config.opts("mechanic", "car.name") logger.info("Preparing track [%s]" % track_name) # TODO #71: Reconsider this in case we distribute drivers. *For now* the driver will only be on a single machine, so we're safe. track.prepare_track(self.track, self.config) logger.info( "Benchmark for track [%s], challenge [%s] and car [%s] is about to start." % (track_name, challenge_name, selected_car_name)) self.quiet = self.config.opts("system", "quiet.mode", mandatory=False, default_value=False) self.es = client.EsClientFactory(self.config.opts("client", "hosts"), self.config.opts("client", "options")).create() self.metrics_store = metrics.InMemoryMetricsStore( cfg=self.config, meta_info=msg.metrics_meta_info, lap=msg.lap) invocation = self.config.opts("system", "time.start") expected_cluster_health = self.config.opts("benchmarks", "cluster.health") self.metrics_store.open(invocation, track_name, challenge_name, selected_car_name) self.challenge = select_challenge(self.config, self.track) for template in self.track.templates: setup_template(self.es, template) for index in self.track.indices: setup_index(self.es, index, self.challenge.index_settings) wait_for_status(self.es, expected_cluster_health) allocator = Allocator(self.challenge.schedule) self.allocations = allocator.allocations self.number_of_steps = len(allocator.join_points) - 1 self.ops_per_join_point = allocator.operations_per_joinpoint logger.info( "Benchmark consists of [%d] steps executed by (at most) [%d] clients as specified by the allocation matrix:\n%s" % (self.number_of_steps, len(self.allocations), self.allocations)) for client_id in range(allocator.clients): self.drivers.append( self.createActor( LoadGenerator, globalName="/rally/driver/worker/%s" % str(client_id), targetActorRequirements={"coordinator": True})) for client_id, driver in enumerate(self.drivers): logger.info("Starting load generator [%d]." % client_id) self.send( driver, StartLoadGenerator(client_id, self.config, self.track, self.allocations[client_id])) self.update_progress_message() self.wakeupAfter( datetime.timedelta(seconds=Driver.WAKEUP_INTERVAL_SECONDS))