def report(self, r1, r2): logger.info("Generating comparison report for baseline (invocation=[%s], track=[%s], challenge=[%s], car=[%s]) and " "contender (invocation=[%s], track=[%s], challenge=[%s], car=[%s])" % (r1.trial_timestamp, r1.track, r1.challenge, r1.car, r2.trial_timestamp, r2.track, r2.challenge, r2.car)) # we don't verify anything about the races as it is possible that the user benchmarks two different tracks intentionally baseline_store = metrics.metrics_store(self._config, invocation=r1.trial_timestamp, track=r1.track, challenge=r1.challenge.name, car=r1.car) baseline_stats = Stats(baseline_store, r1.challenge) contender_store = metrics.metrics_store(self._config, invocation=r2.trial_timestamp, track=r2.track, challenge=r2.challenge.name, car=r2.car) contender_stats = Stats(contender_store, r2.challenge) print_internal("") print_internal("Comparing baseline") print_internal(" Race timestamp: %s" % r1.trial_timestamp) print_internal(" Challenge: %s" % r1.challenge.name) print_internal(" Car: %s" % r1.car) print_internal("") print_internal("with contender") print_internal(" Race timestamp: %s" % r2.trial_timestamp) print_internal(" Challenge: %s" % r2.challenge.name) print_internal(" Car: %s" % r2.car) print_internal("") print_header("------------------------------------------------------") print_header(" _______ __ _____ ") print_header(" / ____(_)___ ____ _/ / / ___/_________ ________ ") print_header(" / /_ / / __ \/ __ `/ / \__ \/ ___/ __ \/ ___/ _ \\") print_header(" / __/ / / / / / /_/ / / ___/ / /__/ /_/ / / / __/") print_header("/_/ /_/_/ /_/\__,_/_/ /____/\___/\____/_/ \___/ ") print_header("------------------------------------------------------") print_internal("") print_internal(self.format_as_table(self.metrics_table(baseline_stats, contender_stats)))
def report(self, r1, r2): logger.info("Generating comparison report for baseline (invocation=[%s], track=[%s], challenge=[%s], car=[%s]) and " "contender (invocation=[%s], track=[%s], challenge=[%s], car=[%s])" % (r1.trial_timestamp, r1.track, r1.challenge, r1.car, r2.trial_timestamp, r2.track, r2.challenge, r2.car)) # we don't verify anything about the races as it is possible that the user benchmarks two different tracks intentionally baseline_store = metrics.metrics_store(self._config, invocation=r1.trial_timestamp, track=r1.track, challenge=r1.challenge.name, car=r1.car) baseline_stats = Stats(baseline_store, r1.challenge) contender_store = metrics.metrics_store(self._config, invocation=r2.trial_timestamp, track=r2.track, challenge=r2.challenge.name, car=r2.car) contender_stats = Stats(contender_store, r2.challenge) print_internal("") print_internal("Comparing baseline") print_internal(" Race timestamp: %s" % r1.trial_timestamp) print_internal(" Challenge: %s" % r1.challenge.name) print_internal(" Car: %s" % r1.car) print_internal("") print_internal("with contender") print_internal(" Race timestamp: %s" % r2.trial_timestamp) print_internal(" Challenge: %s" % r2.challenge.name) print_internal(" Car: %s" % r2.car) print_internal("") print_header("------------------------------------------------------") print_header(" _______ __ _____ ") print_header(" / ____(_)___ ____ _/ / / ___/_________ ________ ") print_header(" / /_ / / __ \/ __ `/ / \__ \/ ___/ __ \/ ___/ _ \\") print_header(" / __/ / / / / / /_/ / / ___/ / /__/ /_/ / / / __/") print_header("/_/ /_/_/ /_/\__,_/_/ /____/\___/\____/_/ \___/ ") print_header("------------------------------------------------------") print_internal("") metrics_table = [] metrics_table += self.report_total_times(baseline_stats, contender_stats) metrics_table += self.report_merge_part_times(baseline_stats, contender_stats) # metrics_table += self.report_cpu_usage(baseline_stats, contender_stats) metrics_table += self.report_gc_times(baseline_stats, contender_stats) metrics_table += self.report_disk_usage(baseline_stats, contender_stats) metrics_table += self.report_segment_memory(baseline_stats, contender_stats) metrics_table += self.report_segment_counts(baseline_stats, contender_stats) for t1 in r1.challenge.schedule: for t2 in r2.challenge.schedule: # only report matching metrics if t1.operation.name == t2.operation.name: metrics_table += self.report_throughput(baseline_stats, contender_stats, t1.operation) metrics_table += self.report_latency(baseline_stats, contender_stats, t1.operation) metrics_table += self.report_service_time(baseline_stats, contender_stats, t1.operation) print_internal(tabulate.tabulate(metrics_table, headers=["Metric", "Operation", "Baseline", "Contender", "Diff", "Unit"], numalign="right", stralign="right"))
def report(self, r1, r2): logger.info("Generating comparison report for baseline (invocation=[%s], track=[%s], challenge=[%s], car=[%s]) and " "contender (invocation=[%s], track=[%s], challenge=[%s], car=[%s])" % (r1.trial_timestamp, r1.track, r1.challenge, r1.car, r2.trial_timestamp, r2.track, r2.challenge, r2.car)) # we don't verify anything about the races as it is possible that the user benchmarks two different tracks intentionally baseline_store = metrics.metrics_store(self._config) baseline_store.open(r1.trial_timestamp, r1.track, r1.challenge.name, r1.car) baseline_stats = Stats(baseline_store, stats_sample_size=r1.challenge.stats_sample_size, queries=r1.challenge.queries, search_sample_size=r1.challenge.search_sample_size) contender_store = metrics.metrics_store(self._config) contender_store.open(r2.trial_timestamp, r2.track, r2.challenge.name, r2.car) contender_stats = Stats(contender_store, stats_sample_size=r2.challenge.stats_sample_size, queries=r2.challenge.queries, search_sample_size=r2.challenge.search_sample_size) print_internal("") print_internal("Comparing baseline") print_internal(" Race timestamp: %s" % r1.trial_timestamp) print_internal(" Challenge: %s" % r1.challenge.name) print_internal(" Car: %s" % r1.car) print_internal("") print_internal("with contender") print_internal(" Race timestamp: %s" % r2.trial_timestamp) print_internal(" Challenge: %s" % r2.challenge.name) print_internal(" Car: %s" % r2.car) print_internal("") print_header("------------------------------------------------------") print_header(" _______ __ _____ ") print_header(" / ____(_)___ ____ _/ / / ___/_________ ________ ") print_header(" / /_ / / __ \/ __ `/ / \__ \/ ___/ __ \/ ___/ _ \\") print_header(" / __/ / / / / / /_/ / / ___/ / /__/ /_/ / / / __/") print_header("/_/ /_/_/ /_/\__,_/_/ /____/\___/\____/_/ \___/ ") print_header("------------------------------------------------------") metrics_table = [] metrics_table += self.report_index_throughput(baseline_stats, contender_stats) metrics_table += self.report_merge_part_times(baseline_stats, contender_stats) metrics_table += self.report_total_times(baseline_stats, contender_stats) metrics_table += self.report_search_latency(baseline_stats, contender_stats) metrics_table += self.report_cpu_usage(baseline_stats, contender_stats) metrics_table += self.report_gc_times(baseline_stats, contender_stats) metrics_table += self.report_disk_usage(baseline_stats, contender_stats) metrics_table += self.report_segment_memory(baseline_stats, contender_stats) metrics_table += self.report_segment_counts(baseline_stats, contender_stats) metrics_table += self.report_stats_latency(baseline_stats, contender_stats) print_internal( tabulate.tabulate(metrics_table, headers=["Metric", "Baseline", "Contender", "Diff"], numalign="right", stralign="right"))
def setup(self, sources=False): # to load the track we need to know the correct cluster distribution version. Usually, this value should be set # but there are rare cases (external pipeline and user did not specify the distribution version) where we need # to derive it ourselves. For source builds we always assume "master" if not sources and not self.cfg.exists("mechanic", "distribution.version"): distribution_version = mechanic.cluster_distribution_version(self.cfg) self.logger.info("Automatically derived distribution version [%s]", distribution_version) self.cfg.add(config.Scope.benchmark, "mechanic", "distribution.version", distribution_version) min_es_version = versions.Version.from_string(version.minimum_es_version()) specified_version = versions.Version.from_string(distribution_version) if specified_version < min_es_version: raise exceptions.SystemSetupError(f"Cluster version must be at least [{min_es_version}] but was [{distribution_version}]") self.current_track = track.load_track(self.cfg) self.track_revision = self.cfg.opts("track", "repository.revision", mandatory=False) challenge_name = self.cfg.opts("track", "challenge.name") self.current_challenge = self.current_track.find_challenge_or_default(challenge_name) if self.current_challenge is None: raise exceptions.SystemSetupError( "Track [{}] does not provide challenge [{}]. List the available tracks with {} list tracks.".format( self.current_track.name, challenge_name, PROGRAM_NAME)) if self.current_challenge.user_info: console.info(self.current_challenge.user_info) self.race = metrics.create_race(self.cfg, self.current_track, self.current_challenge, self.track_revision) self.metrics_store = metrics.metrics_store( self.cfg, track=self.race.track_name, challenge=self.race.challenge_name, read_only=False ) self.race_store = metrics.race_store(self.cfg)
def __init__(self, cfg, sources=False, build=False, distribution=False, external=False, docker=False): self.cfg = cfg # we preload the track here but in rare cases (external pipeline and user did not specify the distribution version) we might need # to reload the track again. We are assuming that a track always specifies the same challenges for each version (i.e. branch). t = self._load_track() challenge = self._find_challenge(t) self.race = metrics.create_race(self.cfg, t, challenge) self.metrics_store = metrics.metrics_store( self.cfg, track=self.race.track_name, challenge=self.race.challenge_name, read_only=False) self.race_store = metrics.race_store(self.cfg) self.sources = sources self.build = build self.distribution = distribution self.external = external self.docker = docker self.actor_system = None self.mechanic = None self.ignore_unknown_return = self.cfg.opts("system", "ignore.unknown.return")
def __init__(self, cfg, sources=False, build=False, distribution=False, external=False, docker=False): self.cfg = cfg self.track = track.load_track(self.cfg) challenge_name = self.cfg.opts("track", "challenge.name") challenge = self.track.find_challenge_or_default(challenge_name) if challenge is None: raise exceptions.SystemSetupError( "Track [%s] does not provide challenge [%s]. List the available tracks with %s list tracks." % (self.track.name, challenge_name, PROGRAM_NAME)) self.metrics_store = metrics.metrics_store(self.cfg, track=self.track.name, challenge=challenge.name, read_only=False) self.race_store = metrics.race_store(self.cfg) self.sources = sources self.build = build self.distribution = distribution self.external = external self.docker = docker self.actor_system = None self.mechanic = None
def benchmark_only(cfg): # We'll use a special car name for external benchmarks. cfg.add(config.Scope.benchmark, "benchmarks", "car", "external") metrics_store = metrics.metrics_store(cfg, read_only=False) return race( Benchmark(cfg, mechanic.create(cfg, metrics_store, external=True), metrics_store), cfg)
def report(self, t): if self.is_final_report(): print_internal("") print_header( "------------------------------------------------------") print_header( " _______ __ _____ ") print_header( " / ____(_)___ ____ _/ / / ___/_________ ________ ") print_header( " / /_ / / __ \/ __ `/ / \__ \/ ___/ __ \/ ___/ _ \\") print_header( " / __/ / / / / / /_/ / / ___/ / /__/ /_/ / / / __/") print_header( "/_/ /_/_/ /_/\__,_/_/ /____/\___/\____/_/ \___/ ") print_header( "------------------------------------------------------") print_internal("") else: print_internal("") print_header("--------------------------------------------------") print_header(" __ _____ ") print_header(" / / ____ _____ / ___/_________ ________ ") print_header(" / / / __ `/ __ \ \__ \/ ___/ __ \/ ___/ _ \\") print_header(" / /___/ /_/ / /_/ / ___/ / /__/ /_/ / / / __/") print_header("/_____/\__,_/ .___/ /____/\___/\____/_/ \___/ ") print_header(" /_/ ") print_header("--------------------------------------------------") print_internal("") selected_challenge = self._config.opts("benchmarks", "challenge") for challenge in t.challenges: if challenge.name == selected_challenge: store = metrics.metrics_store(self._config) stats = Stats(store, challenge, self._lap) metrics_table = [] meta_info_table = [] metrics_table += self.report_total_times(stats) metrics_table += self.report_merge_part_times(stats) metrics_table += self.report_cpu_usage(stats) metrics_table += self.report_gc_times(stats) metrics_table += self.report_disk_usage(stats) metrics_table += self.report_segment_memory(stats) metrics_table += self.report_segment_counts(stats) for tasks in challenge.schedule: for task in tasks: metrics_table += self.report_throughput( stats, task.operation) metrics_table += self.report_latency( stats, task.operation) metrics_table += self.report_service_time( stats, task.operation) meta_info_table += self.report_meta_info() self.write_report(metrics_table, meta_info_table)
def start_metrics(self, track, challenge, car): invocation = self._config.opts("meta", "time.start") self._metrics_store = metrics.metrics_store(self._config) self._metrics_store.open(invocation, track, challenge, car, create=True)
def test_calculate_simple_index_stats(self): cfg = config.Config() cfg.add(config.Scope.application, "system", "env.name", "unittest") cfg.add(config.Scope.application, "system", "time.start", datetime.datetime.now()) cfg.add(config.Scope.application, "system", "trial.id", "6ebc6e53-ee20-4b0c-99b4-09697987e9f4") cfg.add(config.Scope.application, "reporting", "datastore.type", "in-memory") cfg.add(config.Scope.application, "mechanic", "car.names", ["unittest_car"]) cfg.add(config.Scope.application, "mechanic", "car.params", {}) cfg.add(config.Scope.application, "mechanic", "plugin.params", {}) cfg.add(config.Scope.application, "race", "laps", 1) cfg.add(config.Scope.application, "race", "user.tag", "") cfg.add(config.Scope.application, "race", "pipeline", "from-sources-skip-build") cfg.add(config.Scope.application, "track", "params", {}) index = track.Task(name="index #1", operation=track.Operation(name="index", operation_type=track.OperationType.Bulk, params=None)) challenge = track.Challenge(name="unittest", schedule=[index], default=True) t = track.Track("unittest", "unittest-track", challenges=[challenge]) store = metrics.metrics_store(cfg, read_only=False, track=t, challenge=challenge) store.lap = 1 store.put_value_cluster_level("throughput", 500, unit="docs/s", task="index #1", operation_type=track.OperationType.Bulk) store.put_value_cluster_level("throughput", 1000, unit="docs/s", task="index #1", operation_type=track.OperationType.Bulk) store.put_value_cluster_level("throughput", 2000, unit="docs/s", task="index #1", operation_type=track.OperationType.Bulk) store.put_value_cluster_level("latency", 2800, unit="ms", task="index #1", operation_type=track.OperationType.Bulk, sample_type=metrics.SampleType.Warmup) store.put_value_cluster_level("latency", 200, unit="ms", task="index #1", operation_type=track.OperationType.Bulk) store.put_value_cluster_level("latency", 220, unit="ms", task="index #1", operation_type=track.OperationType.Bulk) store.put_value_cluster_level("latency", 225, unit="ms", task="index #1", operation_type=track.OperationType.Bulk) store.put_value_cluster_level("service_time", 250, unit="ms", task="index #1", operation_type=track.OperationType.Bulk, sample_type=metrics.SampleType.Warmup, meta_data={"success": False}) store.put_value_cluster_level("service_time", 190, unit="ms", task="index #1", operation_type=track.OperationType.Bulk, meta_data={"success": True}) store.put_value_cluster_level("service_time", 200, unit="ms", task="index #1", operation_type=track.OperationType.Bulk, meta_data={"success": False}) store.put_value_cluster_level("service_time", 215, unit="ms", task="index #1", operation_type=track.OperationType.Bulk, meta_data={"success": True}) store.put_count_node_level("rally-node-0", "final_index_size_bytes", 2048, unit="bytes") store.put_count_node_level("rally-node-1", "final_index_size_bytes", 4096, unit="bytes") stats = reporter.calculate_results(store, metrics.create_race(cfg, t, challenge)) del store opm = stats.metrics("index #1") self.assertEqual(collections.OrderedDict([("min", 500), ("median", 1000), ("max", 2000), ("unit", "docs/s")]), opm["throughput"]) self.assertEqual(collections.OrderedDict([("50_0", 220), ("100_0", 225)]), opm["latency"]) self.assertEqual(collections.OrderedDict([("50_0", 200), ("100_0", 215)]), opm["service_time"]) self.assertAlmostEqual(0.3333333333333333, opm["error_rate"]) self.assertEqual(6144, stats.index_size)
def setup(self, msg, sender): self.start_sender = sender self.cfg = msg.cfg # to load the track we need to know the correct cluster distribution version. Usually, this value should be set but there are rare # cases (external pipeline and user did not specify the distribution version) where we need to derive it ourselves. For source # builds we always assume "master" if not msg.sources and not self.cfg.exists("mechanic", "distribution.version"): distribution_version = mechanic.cluster_distribution_version( self.cfg) if not distribution_version: raise exceptions.SystemSetupError( "A distribution version is required. Please specify it with --distribution-version." ) self.logger.info("Automatically derived distribution version [%s]", distribution_version) self.cfg.add(config.Scope.benchmark, "mechanic", "distribution.version", distribution_version) t = track.load_track(self.cfg) self.track_revision = self.cfg.opts("track", "repository.revision", mandatory=False) challenge_name = self.cfg.opts("track", "challenge.name") challenge = t.find_challenge_or_default(challenge_name) if challenge is None: raise exceptions.SystemSetupError( "Track [%s] does not provide challenge [%s]. List the available tracks with %s list tracks." % (t.name, challenge_name, PROGRAM_NAME)) if challenge.user_info: console.info(challenge.user_info) self.race = metrics.create_race(self.cfg, t, challenge, self.track_revision) self.metrics_store = metrics.metrics_store( self.cfg, track=self.race.track_name, challenge=self.race.challenge_name, read_only=False) self.race_store = metrics.race_store(self.cfg) self.logger.info("Asking mechanic to start the engine.") cluster_settings = challenge.cluster_settings self.mechanic = self.createActor( mechanic.MechanicActor, targetActorRequirements={"coordinator": True}) self.send( self.mechanic, mechanic.StartEngine(self.cfg, self.metrics_store.open_context, cluster_settings, msg.sources, msg.build, msg.distribution, msg.external, msg.docker))
def test_calculate_simple_index_stats(self): cfg = config.Config() cfg.add(config.Scope.application, "system", "env.name", "unittest") cfg.add(config.Scope.application, "system", "time.start", datetime.datetime.now()) cfg.add(config.Scope.application, "reporting", "datastore.type", "in-memory") cfg.add(config.Scope.application, "mechanic", "car.names", ["unittest_car"]) cfg.add(config.Scope.application, "race", "laps", 1) cfg.add(config.Scope.application, "race", "user.tag", "") cfg.add(config.Scope.application, "race", "pipeline", "from-sources-skip-build") index = track.Task(operation=track.Operation(name="index", operation_type=track.OperationType.Index, params=None)) challenge = track.Challenge(name="unittest", description="", index_settings=None, schedule=[index], default=True) t = track.Track("unittest", "unittest-track", challenges=[challenge]) store = metrics.metrics_store(cfg, read_only=False, track=t, challenge=challenge) store.lap = 1 store.put_value_cluster_level("throughput", 500, unit="docs/s", operation="index", operation_type=track.OperationType.Index) store.put_value_cluster_level("throughput", 1000, unit="docs/s", operation="index", operation_type=track.OperationType.Index) store.put_value_cluster_level("throughput", 2000, unit="docs/s", operation="index", operation_type=track.OperationType.Index) store.put_value_cluster_level("latency", 2800, unit="ms", operation="index", operation_type=track.OperationType.Index, sample_type=metrics.SampleType.Warmup) store.put_value_cluster_level("latency", 200, unit="ms", operation="index", operation_type=track.OperationType.Index) store.put_value_cluster_level("latency", 220, unit="ms", operation="index", operation_type=track.OperationType.Index) store.put_value_cluster_level("latency", 225, unit="ms", operation="index", operation_type=track.OperationType.Index) store.put_value_cluster_level("service_time", 250, unit="ms", operation="index", operation_type=track.OperationType.Index, sample_type=metrics.SampleType.Warmup, meta_data={"success": False}) store.put_value_cluster_level("service_time", 190, unit="ms", operation="index", operation_type=track.OperationType.Index, meta_data={"success": True}) store.put_value_cluster_level("service_time", 200, unit="ms", operation="index", operation_type=track.OperationType.Index, meta_data={"success": False}) store.put_value_cluster_level("service_time", 215, unit="ms", operation="index", operation_type=track.OperationType.Index, meta_data={"success": True}) store.put_count_node_level("rally-node-0", "final_index_size_bytes", 2048, unit="bytes") store.put_count_node_level("rally-node-1", "final_index_size_bytes", 4096, unit="bytes") stats = reporter.calculate_results(store, metrics.create_race(cfg, t, challenge)) del store opm = stats.metrics("index") self.assertEqual(collections.OrderedDict([("min", 500), ("median", 1000), ("max", 2000), ("unit", "docs/s")]), opm["throughput"]) self.assertEqual(collections.OrderedDict([("50", 220), ("100", 225)]), opm["latency"]) self.assertEqual(collections.OrderedDict([("50", 200), ("100", 215)]), opm["service_time"]) self.assertAlmostEqual(0.3333333333333333, opm["error_rate"]) self.assertEqual(6144, stats.index_size)
def report(self, t): print_internal("") print_header("------------------------------------------------------") print_header(" _______ __ _____ ") print_header(" / ____(_)___ ____ _/ / / ___/_________ ________ ") print_header(" / /_ / / __ \/ __ `/ / \__ \/ ___/ __ \/ ___/ _ \\") print_header(" / __/ / / / / / /_/ / / ___/ / /__/ /_/ / / / __/") print_header("/_/ /_/_/ /_/\__,_/_/ /____/\___/\____/_/ \___/ ") print_header("------------------------------------------------------") print_internal("") selected_challenge = self._config.opts("benchmarks", "challenge") selected_car = self._config.opts("benchmarks", "car") invocation = self._config.opts("meta", "time.start") logger.info( "Generating summary report for invocation=[%s], track=[%s], challenge=[%s], car=[%s]" % (invocation, t.name, selected_challenge, selected_car)) for challenge in t.challenges: if challenge.name == selected_challenge: store = metrics.metrics_store(self._config) store.open(invocation, t.name, challenge.name, selected_car) stats = Stats(store, challenge) metrics_table = [] meta_info_table = [] metrics_table += self.report_total_times(stats) metrics_table += self.report_merge_part_times(stats) metrics_table += self.report_cpu_usage(stats) metrics_table += self.report_gc_times(stats) metrics_table += self.report_disk_usage(stats) metrics_table += self.report_segment_memory(stats) metrics_table += self.report_segment_counts(stats) for tasks in challenge.schedule: for task in tasks: metrics_table += self.report_throughput( stats, task.operation) metrics_table += self.report_latency( stats, task.operation) metrics_table += self.report_service_time( stats, task.operation) meta_info_table += self.report_meta_info() self.write_report(metrics_table, meta_info_table)
def report(self, t): print_header("------------------------------------------------------") print_header(" _______ __ _____ ") print_header(" / ____(_)___ ____ _/ / / ___/_________ ________ ") print_header(" / /_ / / __ \/ __ `/ / \__ \/ ___/ __ \/ ___/ _ \\") print_header(" / __/ / / / / / /_/ / / ___/ / /__/ /_/ / / / __/") print_header("/_/ /_/_/ /_/\__,_/_/ /____/\___/\____/_/ \___/ ") print_header("------------------------------------------------------") selected_challenge = self._config.opts("benchmarks", "challenge") selected_car = self._config.opts("benchmarks", "car") invocation = self._config.opts("meta", "time.start") logger.info("Generating summary report for invocation=[%s], track=[%s], challenge=[%s], car=[%s]" % (invocation, t.name, selected_challenge, selected_car)) for challenge in t.challenges: if challenge.name == selected_challenge: store = metrics.metrics_store(self._config) store.open(invocation, t.name, challenge.name, selected_car) stats = Stats(store, self.guarded(lambda: challenge.benchmark[track.BenchmarkPhase.stats].iteration_count), self.guarded(lambda: challenge.benchmark[track.BenchmarkPhase.search].queries), self.guarded(lambda: challenge.benchmark[track.BenchmarkPhase.search].iteration_count)) metrics_table = [] if track.BenchmarkPhase.index in challenge.benchmark: metrics_table += self.report_index_throughput(stats) metrics_table += self.report_total_times(stats) metrics_table += self.report_merge_part_times(stats) if track.BenchmarkPhase.search in challenge.benchmark: metrics_table += self.report_search_latency(stats) metrics_table += self.report_cpu_usage(stats) metrics_table += self.report_gc_times(stats) metrics_table += self.report_disk_usage(stats) metrics_table += self.report_segment_memory(stats) metrics_table += self.report_segment_counts(stats) if track.BenchmarkPhase.stats in challenge.benchmark: metrics_table += self.report_stats_latency(stats) self.write_report(metrics_table)
def setup(self): if self.track.has_plugins: # no need to fetch the track once more; it has already been updated track.track_repo(self.config, fetch=False, update=False) # load track plugins eagerly to initialize the respective parameter sources track.load_track_plugins(self.config, runner.register_runner, scheduler.register_scheduler) track.prepare_track(self.track, self.config) self.metrics_store = metrics.metrics_store( cfg=self.config, track=self.track.name, challenge=self.challenge.name, read_only=False) self.es_clients = self.create_es_clients() self.wait_for_rest_api() self.prepare_telemetry() cluster_info = self.retrieve_cluster_info() cluster_version = cluster_info["version"] if cluster_info else {} return cluster_version.get("build_flavor", "oss"), cluster_version.get( "number"), cluster_version.get("build_hash")
def setup(self, msg): self.mechanic = self.createActor(mechanic.MechanicActor, #globalName="/rally/mechanic/coordinator", targetActorRequirements={"coordinator": True}) self.cfg = msg.cfg # to load the track we need to know the correct cluster distribution version. Usually, this value should be set but there are rare # cases (external pipeline and user did not specify the distribution version) where we need to derive it ourselves. For source # builds we always assume "master" if not msg.sources and not self.cfg.exists("mechanic", "distribution.version"): distribution_version = mechanic.cluster_distribution_version(self.cfg) if not distribution_version: raise exceptions.SystemSetupError("A distribution version is required. Please specify it with --distribution-version.") logger.info("Automatically derived distribution version [%s]" % distribution_version) self.cfg.add(config.Scope.benchmark, "mechanic", "distribution.version", distribution_version) t = track.load_track(self.cfg) challenge_name = self.cfg.opts("track", "challenge.name") challenge = t.find_challenge_or_default(challenge_name) if challenge is None: raise exceptions.SystemSetupError("Track [%s] does not provide challenge [%s]. List the available tracks with %s list tracks." % (t.name, challenge_name, PROGRAM_NAME)) if challenge.user_info: console.info(challenge.user_info, logger=logger) self.race = metrics.create_race(self.cfg, t, challenge) self.metrics_store = metrics.metrics_store( self.cfg, track=self.race.track_name, challenge=self.race.challenge_name, read_only=False ) self.lap_counter = LapCounter(self.race, self.metrics_store, self.cfg) self.race_store = metrics.race_store(self.cfg) logger.info("Asking mechanic to start the engine.") cluster_settings = self.race.challenge.cluster_settings self.send(self.mechanic, mechanic.StartEngine(self.cfg, self.metrics_store.open_context, cluster_settings, msg.sources, msg.build, msg.distribution, msg.external, msg.docker))
def __init__(self, cfg, sources=False, build=False, distribution=False, external=False, docker=False): self.cfg = cfg self.track = track.load_track(self.cfg) self.metrics_store = metrics.metrics_store( self.cfg, track=self.track.name, challenge=self.track.find_challenge_or_default( self.cfg.opts("track", "challenge.name")).name, read_only=False) self.race_store = metrics.race_store(self.cfg) self.sources = sources self.build = build self.distribution = distribution self.external = external self.docker = docker self.actor_system = None self.mechanic = None
def from_sources_skip_build(cfg): metrics_store = metrics.metrics_store(cfg, read_only=False) return race(Benchmark(cfg, mechanic.create(cfg, metrics_store, sources=True, build=False), metrics_store), cfg)
def test_run_benchmark(self): cfg = config.Config() cfg.add(config.Scope.application, "system", "env.name", "unittest") cfg.add( config.Scope.application, "system", "time.start", datetime(year=2017, month=8, day=20, hour=1, minute=0, second=0)) cfg.add(config.Scope.application, "system", "race.id", "6ebc6e53-ee20-4b0c-99b4-09697987e9f4") cfg.add(config.Scope.application, "system", "offline.mode", False) cfg.add(config.Scope.application, "driver", "on.error", "abort") cfg.add(config.Scope.application, "driver", "profiling", False) cfg.add(config.Scope.application, "reporting", "datastore.type", "in-memory") cfg.add(config.Scope.application, "track", "params", {}) cfg.add(config.Scope.application, "track", "test.mode.enabled", True) cfg.add(config.Scope.application, "telemetry", "devices", []) cfg.add(config.Scope.application, "telemetry", "params", {}) cfg.add(config.Scope.application, "mechanic", "car.names", ["external"]) cfg.add(config.Scope.application, "mechanic", "skip.rest.api.check", True) cfg.add( config.Scope.application, "client", "hosts", AsyncDriverTests.Holder(all_hosts={"default": ["localhost:9200"]})) cfg.add(config.Scope.application, "client", "options", AsyncDriverTests.Holder(all_client_options={"default": {}})) params.register_param_source_for_name("bulk-param-source", AsyncDriverTestParamSource) task = track.Task( name="bulk-index", operation=track.Operation( "bulk-index", track.OperationType.Bulk.name, params={ "body": ["action_metadata_line", "index_line"], "action-metadata-present": True, "bulk-size": 1, # we need this because the parameter source does not know that we only have one # bulk and hence size() returns incorrect results "size": 1 }, param_source="bulk-param-source"), warmup_iterations=0, iterations=1, clients=1) current_challenge = track.Challenge(name="default", default=True, schedule=[task]) current_track = track.Track(name="unit-test", challenges=[current_challenge]) driver = async_driver.AsyncDriver( cfg, current_track, current_challenge, es_client_factory_class=StaticClientFactory) distribution_flavor, distribution_version, revision = driver.setup() self.assertEqual("oss", distribution_flavor) self.assertEqual("7.3.0", distribution_version) self.assertEqual("de777fa", revision) metrics_store_representation = driver.run() metric_store = metrics.metrics_store(cfg, read_only=True, track=current_track, challenge=current_challenge) metric_store.bulk_add(metrics_store_representation) self.assertIsNotNone( metric_store.get_one(name="latency", task="bulk-index", sample_type=metrics.SampleType.Normal)) self.assertIsNotNone( metric_store.get_one(name="service_time", task="bulk-index", sample_type=metrics.SampleType.Normal)) self.assertIsNotNone( metric_store.get_one(name="processing_time", task="bulk-index", sample_type=metrics.SampleType.Normal)) self.assertIsNotNone( metric_store.get_one(name="throughput", task="bulk-index", sample_type=metrics.SampleType.Normal)) self.assertIsNotNone( metric_store.get_one(name="node_total_young_gen_gc_time", sample_type=metrics.SampleType.Normal)) self.assertIsNotNone( metric_store.get_one(name="node_total_old_gen_gc_time", sample_type=metrics.SampleType.Normal)) # ensure that there are not more documents than we expect self.assertEqual(6, len(metric_store.docs), msg=json.dumps(metric_store.docs, indent=2))
def start_metrics(self, track, challenge, car): invocation = self._config.opts("meta", "time.start") self._metrics_store = metrics.metrics_store(self._config) self._metrics_store.open(invocation, str(track), str(challenge), str(car), create=True)
def benchmark_only(cfg): # We'll use a special car name for external benchmarks. cfg.add(config.Scope.benchmark, "benchmarks", "car", "external") metrics_store = metrics.metrics_store(cfg, read_only=False) return race(Benchmark(cfg, mechanic.create(cfg, metrics_store, external=True), metrics_store), cfg)
def from_distribution(cfg): metrics_store = metrics.metrics_store(cfg, read_only=False) return race(Benchmark(cfg, mechanic.create(cfg, metrics_store, distribution=True), metrics_store), cfg)
def docker(cfg): metrics_store = metrics.metrics_store(cfg, read_only=False) return race(Benchmark(cfg, mechanic.create(cfg, metrics_store, docker=True), metrics_store), cfg)
def test_calculate_simple_index_stats(self): cfg = config.Config() cfg.add(config.Scope.application, "system", "env.name", "unittest") cfg.add(config.Scope.application, "system", "time.start", datetime.datetime.now()) cfg.add(config.Scope.application, "reporting", "datastore.type", "in-memory") cfg.add(config.Scope.application, "mechanic", "car.name", "unittest_car") cfg.add(config.Scope.application, "race", "laps", 1) cfg.add(config.Scope.application, "race", "user.tag", "") cfg.add(config.Scope.application, "race", "pipeline", "from-sources-skip-build") index = track.Task( operation=track.Operation(name="index", operation_type=track.OperationType.Index, params=None)) challenge = track.Challenge(name="unittest", description="", index_settings=None, schedule=[index], default=True) t = track.Track("unittest", "unittest-track", challenges=[challenge]) store = metrics.metrics_store(cfg, read_only=False, track=t, challenge=challenge) store.lap = 1 store.put_value_cluster_level("throughput", 500, unit="docs/s", operation="index", operation_type=track.OperationType.Index) store.put_value_cluster_level("throughput", 1000, unit="docs/s", operation="index", operation_type=track.OperationType.Index) store.put_value_cluster_level("throughput", 2000, unit="docs/s", operation="index", operation_type=track.OperationType.Index) store.put_value_cluster_level("latency", 2800, unit="ms", operation="index", operation_type=track.OperationType.Index, sample_type=metrics.SampleType.Warmup) store.put_value_cluster_level("latency", 200, unit="ms", operation="index", operation_type=track.OperationType.Index) store.put_value_cluster_level("latency", 220, unit="ms", operation="index", operation_type=track.OperationType.Index) store.put_value_cluster_level("latency", 225, unit="ms", operation="index", operation_type=track.OperationType.Index) store.put_value_cluster_level("service_time", 250, unit="ms", operation="index", operation_type=track.OperationType.Index, sample_type=metrics.SampleType.Warmup, meta_data={"success": False}) store.put_value_cluster_level("service_time", 190, unit="ms", operation="index", operation_type=track.OperationType.Index, meta_data={"success": True}) store.put_value_cluster_level("service_time", 200, unit="ms", operation="index", operation_type=track.OperationType.Index, meta_data={"success": False}) store.put_value_cluster_level("service_time", 215, unit="ms", operation="index", operation_type=track.OperationType.Index, meta_data={"success": True}) stats = reporter.calculate_results( store, metrics.create_race(cfg, t, challenge)) del store opm = stats.metrics("index") self.assertEqual( collections.OrderedDict([("min", 500), ("median", 1000), ("max", 2000), ("unit", "docs/s")]), opm["throughput"]) self.assertEqual(collections.OrderedDict([("50", 220), ("100", 225)]), opm["latency"]) self.assertEqual(collections.OrderedDict([("50", 200), ("100", 215)]), opm["service_time"]) self.assertAlmostEqual(0.3333333333333333, opm["error_rate"])