def teardown(self, cancelled=False, error=False): logger.info("Asking mechanic to stop the engine.") result = self.actor_system.ask(self.mechanic, mechanic.StopEngine()) if isinstance(result, mechanic.EngineStopped): logger.info("Mechanic has stopped engine successfully.") logger.info("Bulk adding system metrics to metrics store.") self.metrics_store.bulk_add(result.system_metrics) elif isinstance(result, mechanic.Failure): logger.info("Stopping engine has failed. Reason [%s]." % result.message) raise exceptions.RallyError(result.message, result.cause) else: if self.ignore_unknown_return: console.warn( "Mechanic has not stopped engine but instead [%s]. Ignoring." % str(result), logger=logger) else: raise exceptions.RallyError( "Mechanic has not stopped engine but instead [%s]. Terminating race without result." % str(result)) self.metrics_store.flush() if not cancelled and not error: final_results = reporter.calculate_results(self.metrics_store, self.race) self.race.add_final_results(final_results) reporter.summarize(self.race, self.cfg) self.race_store.store_race(self.race) else: logger.info( "Suppressing output of summary report. Cancelled = [%r], Error = [%r]." % (cancelled, error)) self.metrics_store.close()
def test_calculate_simple_index_stats(self): cfg = config.Config() cfg.add(config.Scope.application, "system", "env.name", "unittest") cfg.add(config.Scope.application, "system", "time.start", datetime.datetime.now()) cfg.add(config.Scope.application, "system", "trial.id", "6ebc6e53-ee20-4b0c-99b4-09697987e9f4") cfg.add(config.Scope.application, "reporting", "datastore.type", "in-memory") cfg.add(config.Scope.application, "mechanic", "car.names", ["unittest_car"]) cfg.add(config.Scope.application, "mechanic", "car.params", {}) cfg.add(config.Scope.application, "mechanic", "plugin.params", {}) cfg.add(config.Scope.application, "race", "laps", 1) cfg.add(config.Scope.application, "race", "user.tag", "") cfg.add(config.Scope.application, "race", "pipeline", "from-sources-skip-build") cfg.add(config.Scope.application, "track", "params", {}) index = track.Task(name="index #1", operation=track.Operation(name="index", operation_type=track.OperationType.Bulk, params=None)) challenge = track.Challenge(name="unittest", schedule=[index], default=True) t = track.Track("unittest", "unittest-track", challenges=[challenge]) store = metrics.metrics_store(cfg, read_only=False, track=t, challenge=challenge) store.lap = 1 store.put_value_cluster_level("throughput", 500, unit="docs/s", task="index #1", operation_type=track.OperationType.Bulk) store.put_value_cluster_level("throughput", 1000, unit="docs/s", task="index #1", operation_type=track.OperationType.Bulk) store.put_value_cluster_level("throughput", 2000, unit="docs/s", task="index #1", operation_type=track.OperationType.Bulk) store.put_value_cluster_level("latency", 2800, unit="ms", task="index #1", operation_type=track.OperationType.Bulk, sample_type=metrics.SampleType.Warmup) store.put_value_cluster_level("latency", 200, unit="ms", task="index #1", operation_type=track.OperationType.Bulk) store.put_value_cluster_level("latency", 220, unit="ms", task="index #1", operation_type=track.OperationType.Bulk) store.put_value_cluster_level("latency", 225, unit="ms", task="index #1", operation_type=track.OperationType.Bulk) store.put_value_cluster_level("service_time", 250, unit="ms", task="index #1", operation_type=track.OperationType.Bulk, sample_type=metrics.SampleType.Warmup, meta_data={"success": False}) store.put_value_cluster_level("service_time", 190, unit="ms", task="index #1", operation_type=track.OperationType.Bulk, meta_data={"success": True}) store.put_value_cluster_level("service_time", 200, unit="ms", task="index #1", operation_type=track.OperationType.Bulk, meta_data={"success": False}) store.put_value_cluster_level("service_time", 215, unit="ms", task="index #1", operation_type=track.OperationType.Bulk, meta_data={"success": True}) store.put_count_node_level("rally-node-0", "final_index_size_bytes", 2048, unit="bytes") store.put_count_node_level("rally-node-1", "final_index_size_bytes", 4096, unit="bytes") stats = reporter.calculate_results(store, metrics.create_race(cfg, t, challenge)) del store opm = stats.metrics("index #1") self.assertEqual(collections.OrderedDict([("min", 500), ("median", 1000), ("max", 2000), ("unit", "docs/s")]), opm["throughput"]) self.assertEqual(collections.OrderedDict([("50_0", 220), ("100_0", 225)]), opm["latency"]) self.assertEqual(collections.OrderedDict([("50_0", 200), ("100_0", 215)]), opm["service_time"]) self.assertAlmostEqual(0.3333333333333333, opm["error_rate"]) self.assertEqual(6144, stats.index_size)
def receiveMsg_EngineStopped(self, msg, sender): self.logger.info("Mechanic has stopped engine successfully.") self.logger.info("Bulk adding system metrics to metrics store.") self.metrics_store.bulk_add(msg.system_metrics) self.metrics_store.flush() if not self.cancelled and not self.error: final_results = reporter.calculate_results(self.metrics_store, self.race) self.race.add_final_results(final_results) reporter.summarize(self.race, self.cfg) self.race_store.store_race(self.race) else: self.logger.info("Suppressing output of summary report. Cancelled = [%r], Error = [%r].", self.cancelled, self.error) self.metrics_store.close() self.send(self.start_sender, Success())
def test_calculate_simple_index_stats(self): cfg = config.Config() cfg.add(config.Scope.application, "system", "env.name", "unittest") cfg.add(config.Scope.application, "system", "time.start", datetime.datetime.now()) cfg.add(config.Scope.application, "reporting", "datastore.type", "in-memory") cfg.add(config.Scope.application, "mechanic", "car.names", ["unittest_car"]) cfg.add(config.Scope.application, "race", "laps", 1) cfg.add(config.Scope.application, "race", "user.tag", "") cfg.add(config.Scope.application, "race", "pipeline", "from-sources-skip-build") index = track.Task(operation=track.Operation(name="index", operation_type=track.OperationType.Index, params=None)) challenge = track.Challenge(name="unittest", description="", index_settings=None, schedule=[index], default=True) t = track.Track("unittest", "unittest-track", challenges=[challenge]) store = metrics.metrics_store(cfg, read_only=False, track=t, challenge=challenge) store.lap = 1 store.put_value_cluster_level("throughput", 500, unit="docs/s", operation="index", operation_type=track.OperationType.Index) store.put_value_cluster_level("throughput", 1000, unit="docs/s", operation="index", operation_type=track.OperationType.Index) store.put_value_cluster_level("throughput", 2000, unit="docs/s", operation="index", operation_type=track.OperationType.Index) store.put_value_cluster_level("latency", 2800, unit="ms", operation="index", operation_type=track.OperationType.Index, sample_type=metrics.SampleType.Warmup) store.put_value_cluster_level("latency", 200, unit="ms", operation="index", operation_type=track.OperationType.Index) store.put_value_cluster_level("latency", 220, unit="ms", operation="index", operation_type=track.OperationType.Index) store.put_value_cluster_level("latency", 225, unit="ms", operation="index", operation_type=track.OperationType.Index) store.put_value_cluster_level("service_time", 250, unit="ms", operation="index", operation_type=track.OperationType.Index, sample_type=metrics.SampleType.Warmup, meta_data={"success": False}) store.put_value_cluster_level("service_time", 190, unit="ms", operation="index", operation_type=track.OperationType.Index, meta_data={"success": True}) store.put_value_cluster_level("service_time", 200, unit="ms", operation="index", operation_type=track.OperationType.Index, meta_data={"success": False}) store.put_value_cluster_level("service_time", 215, unit="ms", operation="index", operation_type=track.OperationType.Index, meta_data={"success": True}) store.put_count_node_level("rally-node-0", "final_index_size_bytes", 2048, unit="bytes") store.put_count_node_level("rally-node-1", "final_index_size_bytes", 4096, unit="bytes") stats = reporter.calculate_results(store, metrics.create_race(cfg, t, challenge)) del store opm = stats.metrics("index") self.assertEqual(collections.OrderedDict([("min", 500), ("median", 1000), ("max", 2000), ("unit", "docs/s")]), opm["throughput"]) self.assertEqual(collections.OrderedDict([("50", 220), ("100", 225)]), opm["latency"]) self.assertEqual(collections.OrderedDict([("50", 200), ("100", 215)]), opm["service_time"]) self.assertAlmostEqual(0.3333333333333333, opm["error_rate"]) self.assertEqual(6144, stats.index_size)
def after_lap(self, lap): logger.info("Finished lap [%d/%d]" % (lap, self.race.total_laps)) if self.race.total_laps > 1: lap_time = self.lap_timer.split_time() - self.lap_times self.lap_times += lap_time hl, ml, sl = convert.seconds_to_hour_minute_seconds(lap_time) lap_results = reporter.calculate_results(self.metrics_store, self.race, lap) self.race.add_lap_results(lap_results) reporter.summarize(self.race, self.cfg, lap=lap) console.println("") if lap < self.race.total_laps: remaining = (self.race.total_laps - lap) * self.lap_times / lap hr, mr, sr = convert.seconds_to_hour_minute_seconds(remaining) console.info("Lap time %02d:%02d:%02d (ETA: %02d:%02d:%02d)" % (hl, ml, sl, hr, mr, sr), logger=logger) else: console.info("Lap time %02d:%02d:%02d" % (hl, ml, sl), logger=logger) console.println("")
def after_lap(self): logger.info("Finished lap [%d/%d]" % (self.current_lap, self.race.total_laps)) if self.race.total_laps > 1: lap_time = self.lap_timer.split_time() - self.lap_times self.lap_times += lap_time hl, ml, sl = convert.seconds_to_hour_minute_seconds(lap_time) lap_results = reporter.calculate_results(self.metrics_store, self.race, self.current_lap) self.race.add_lap_results(lap_results) reporter.summarize(self.race, self.cfg, lap=self.current_lap) console.println("") if self.current_lap < self.race.total_laps: remaining = (self.race.total_laps - self.current_lap) * self.lap_times / self.current_lap hr, mr, sr = convert.seconds_to_hour_minute_seconds(remaining) console.info("Lap time %02d:%02d:%02d (ETA: %02d:%02d:%02d)" % (hl, ml, sl, hr, mr, sr), logger=logger) else: console.info("Lap time %02d:%02d:%02d" % (hl, ml, sl), logger=logger) console.println("")
def test_calculate_simple_index_stats(self): cfg = config.Config() cfg.add(config.Scope.application, "system", "env.name", "unittest") cfg.add(config.Scope.application, "system", "time.start", datetime.datetime.now()) cfg.add(config.Scope.application, "reporting", "datastore.type", "in-memory") cfg.add(config.Scope.application, "mechanic", "car.name", "unittest_car") cfg.add(config.Scope.application, "race", "laps", 1) cfg.add(config.Scope.application, "race", "user.tag", "") cfg.add(config.Scope.application, "race", "pipeline", "from-sources-skip-build") index = track.Task( operation=track.Operation(name="index", operation_type=track.OperationType.Index, params=None)) challenge = track.Challenge(name="unittest", description="", index_settings=None, schedule=[index], default=True) t = track.Track("unittest", "unittest-track", challenges=[challenge]) store = metrics.metrics_store(cfg, read_only=False, track=t, challenge=challenge) store.lap = 1 store.put_value_cluster_level("throughput", 500, unit="docs/s", operation="index", operation_type=track.OperationType.Index) store.put_value_cluster_level("throughput", 1000, unit="docs/s", operation="index", operation_type=track.OperationType.Index) store.put_value_cluster_level("throughput", 2000, unit="docs/s", operation="index", operation_type=track.OperationType.Index) store.put_value_cluster_level("latency", 2800, unit="ms", operation="index", operation_type=track.OperationType.Index, sample_type=metrics.SampleType.Warmup) store.put_value_cluster_level("latency", 200, unit="ms", operation="index", operation_type=track.OperationType.Index) store.put_value_cluster_level("latency", 220, unit="ms", operation="index", operation_type=track.OperationType.Index) store.put_value_cluster_level("latency", 225, unit="ms", operation="index", operation_type=track.OperationType.Index) store.put_value_cluster_level("service_time", 250, unit="ms", operation="index", operation_type=track.OperationType.Index, sample_type=metrics.SampleType.Warmup, meta_data={"success": False}) store.put_value_cluster_level("service_time", 190, unit="ms", operation="index", operation_type=track.OperationType.Index, meta_data={"success": True}) store.put_value_cluster_level("service_time", 200, unit="ms", operation="index", operation_type=track.OperationType.Index, meta_data={"success": False}) store.put_value_cluster_level("service_time", 215, unit="ms", operation="index", operation_type=track.OperationType.Index, meta_data={"success": True}) stats = reporter.calculate_results( store, metrics.create_race(cfg, t, challenge)) del store opm = stats.metrics("index") self.assertEqual( collections.OrderedDict([("min", 500), ("median", 1000), ("max", 2000), ("unit", "docs/s")]), opm["throughput"]) self.assertEqual(collections.OrderedDict([("50", 220), ("100", 225)]), opm["latency"]) self.assertEqual(collections.OrderedDict([("50", 200), ("100", 215)]), opm["service_time"]) self.assertAlmostEqual(0.3333333333333333, opm["error_rate"])
def receiveMessage(self, msg, sender): try: logger.info("BenchmarkActor#receiveMessage(msg = [%s] sender = [%s])" % (str(type(msg)), str(sender))) if isinstance(msg, Setup): self.start_sender = sender self.setup(msg) elif isinstance(msg, mechanic.EngineStarted): logger.info("Mechanic has started engine successfully.") self.metrics_store.meta_info = msg.system_meta_info cluster = msg.cluster_meta_info self.race.cluster = cluster console.info("Racing on track [%s], challenge [%s] and car %s\n" % (self.race.track_name, self.race.challenge_name, self.race.car)) # start running we assume that each race has at least one lap self.run() elif isinstance(msg, driver.TaskFinished): logger.info("Task has finished.") logger.info("Bulk adding request metrics to metrics store.") self.metrics_store.bulk_add(msg.metrics) # We choose *NOT* to reset our own metrics store's timer as this one is only used to collect complete metrics records from # other stores (used by driver and mechanic). Hence there is no need to reset the timer in our own metrics store. self.send(self.mechanic, mechanic.ResetRelativeTime(msg.next_task_scheduled_in)) elif isinstance(msg, actor.BenchmarkCancelled): self.cancelled = True # even notify the start sender if it is the originator. The reason is that we call #ask() which waits for a reply. # We also need to ask in order to avoid races between this notification and the following ActorExitRequest. self.send(self.start_sender, msg) elif isinstance(msg, actor.BenchmarkFailure): logger.info("Received a benchmark failure from [%s] and will forward it now." % sender) self.error = True self.send(self.start_sender, msg) elif isinstance(msg, driver.BenchmarkComplete): logger.info("Benchmark is complete.") logger.info("Bulk adding request metrics to metrics store.") self.metrics_store.bulk_add(msg.metrics) self.send(self.main_driver, thespian.actors.ActorExitRequest()) self.main_driver = None self.send(self.mechanic, mechanic.OnBenchmarkStop()) elif isinstance(msg, mechanic.BenchmarkStopped): logger.info("Bulk adding system metrics to metrics store.") self.metrics_store.bulk_add(msg.system_metrics) logger.info("Flushing metrics data...") self.metrics_store.flush() logger.info("Flushing done") self.lap_counter.after_lap() if self.lap_counter.has_more_laps(): self.run() else: self.teardown() elif isinstance(msg, mechanic.EngineStopped): logger.info("Mechanic has stopped engine successfully.") logger.info("Bulk adding system metrics to metrics store.") self.metrics_store.bulk_add(msg.system_metrics) self.metrics_store.flush() if not self.cancelled and not self.error: final_results = reporter.calculate_results(self.metrics_store, self.race) self.race.add_final_results(final_results) reporter.summarize(self.race, self.cfg) self.race_store.store_race(self.race) else: logger.info("Suppressing output of summary report. Cancelled = [%r], Error = [%r]." % (self.cancelled, self.error)) self.metrics_store.close() self.send(self.start_sender, Success()) elif isinstance(msg, thespian.actors.ActorExitRequest): if self.mechanic: self.send(self.mechanic, msg) self.mechanic = None if self.main_driver: self.send(self.main_driver, msg) self.main_driver = None else: logger.info("BenchmarkActor received unknown message [%s] (ignoring)." % (str(msg))) except BaseException as e: self.error = True logger.exception("BenchmarkActor encountered a fatal exception. Shutting down.") self.send(self.start_sender, actor.BenchmarkFailure("Could not execute benchmark", e))
def receiveMessage(self, msg, sender): try: logger.debug( "BenchmarkActor#receiveMessage(msg = [%s] sender = [%s])" % (str(type(msg)), str(sender))) if isinstance(msg, Setup): self.start_sender = sender self.setup(msg) elif isinstance(msg, mechanic.EngineStarted): logger.info("Mechanic has started engine successfully.") self.metrics_store.meta_info = msg.system_meta_info cluster = msg.cluster_meta_info self.race.cluster = cluster console.info( "Racing on track [%s], challenge [%s] and car %s\n" % (self.race.track_name, self.race.challenge_name, self.race.car)) # start running we assume that each race has at least one lap self.run() elif isinstance(msg, driver.TaskFinished): logger.info("Task has finished.") logger.info("Bulk adding request metrics to metrics store.") self.metrics_store.bulk_add(msg.metrics) # We choose *NOT* to reset our own metrics store's timer as this one is only used to collect complete metrics records from # other stores (used by driver and mechanic). Hence there is no need to reset the timer in our own metrics store. self.send( self.mechanic, mechanic.ResetRelativeTime(msg.next_task_scheduled_in)) elif isinstance(msg, actor.BenchmarkCancelled): self.cancelled = True # even notify the start sender if it is the originator. The reason is that we call #ask() which waits for a reply. # We also need to ask in order to avoid races between this notification and the following ActorExitRequest. self.send(self.start_sender, msg) elif isinstance(msg, actor.BenchmarkFailure): logger.info( "Received a benchmark failure from [%s] and will forward it now." % sender) self.error = True self.send(self.start_sender, msg) elif isinstance(msg, driver.BenchmarkComplete): logger.info("Benchmark is complete.") logger.info("Bulk adding request metrics to metrics store.") self.metrics_store.bulk_add(msg.metrics) self.send(self.main_driver, thespian.actors.ActorExitRequest()) self.main_driver = None self.send(self.mechanic, mechanic.OnBenchmarkStop()) elif isinstance(msg, mechanic.BenchmarkStopped): logger.info("Bulk adding system metrics to metrics store.") self.metrics_store.bulk_add(msg.system_metrics) logger.info("Flushing metrics data...") self.metrics_store.flush() logger.info("Flushing done") self.lap_counter.after_lap() if self.lap_counter.has_more_laps(): self.run() else: self.teardown() elif isinstance(msg, mechanic.EngineStopped): logger.info("Mechanic has stopped engine successfully.") logger.info("Bulk adding system metrics to metrics store.") self.metrics_store.bulk_add(msg.system_metrics) self.metrics_store.flush() if not self.cancelled and not self.error: final_results = reporter.calculate_results( self.metrics_store, self.race) self.race.add_final_results(final_results) reporter.summarize(self.race, self.cfg) self.race_store.store_race(self.race) else: logger.info( "Suppressing output of summary report. Cancelled = [%r], Error = [%r]." % (self.cancelled, self.error)) self.metrics_store.close() self.send(self.start_sender, Success()) elif isinstance(msg, thespian.actors.ActorExitRequest): if self.mechanic: self.send(self.mechanic, msg) self.mechanic = None if self.main_driver: self.send(self.main_driver, msg) self.main_driver = None else: logger.info( "BenchmarkActor received unknown message [%s] (ignoring)." % (str(msg))) except BaseException as e: self.error = True logger.exception( "BenchmarkActor encountered a fatal exception. Shutting down.") self.send(self.start_sender, actor.BenchmarkFailure("Could not execute benchmark", e))