Exemple #1
0
    def report(self, r1, r2):
        logger.info("Generating comparison report for baseline (invocation=[%s], track=[%s], challenge=[%s], car=[%s]) and "
                    "contender (invocation=[%s], track=[%s], challenge=[%s], car=[%s])" %
                    (r1.trial_timestamp, r1.track, r1.challenge, r1.car,
                     r2.trial_timestamp, r2.track, r2.challenge, r2.car))
        # we don't verify anything about the races as it is possible that the user benchmarks two different tracks intentionally
        baseline_store = metrics.metrics_store(self._config,
                                               invocation=r1.trial_timestamp, track=r1.track, challenge=r1.challenge.name, car=r1.car)
        baseline_stats = Stats(baseline_store, r1.challenge)

        contender_store = metrics.metrics_store(self._config,
                                                invocation=r2.trial_timestamp, track=r2.track, challenge=r2.challenge.name, car=r2.car)
        contender_stats = Stats(contender_store, r2.challenge)

        print_internal("")
        print_internal("Comparing baseline")
        print_internal("  Race timestamp: %s" % r1.trial_timestamp)
        print_internal("  Challenge: %s" % r1.challenge.name)
        print_internal("  Car: %s" % r1.car)
        print_internal("")
        print_internal("with contender")
        print_internal("  Race timestamp: %s" % r2.trial_timestamp)
        print_internal("  Challenge: %s" % r2.challenge.name)
        print_internal("  Car: %s" % r2.car)
        print_internal("")
        print_header("------------------------------------------------------")
        print_header("    _______             __   _____                    ")
        print_header("   / ____(_)___  ____ _/ /  / ___/_________  ________ ")
        print_header("  / /_  / / __ \/ __ `/ /   \__ \/ ___/ __ \/ ___/ _ \\")
        print_header(" / __/ / / / / / /_/ / /   ___/ / /__/ /_/ / /  /  __/")
        print_header("/_/   /_/_/ /_/\__,_/_/   /____/\___/\____/_/   \___/ ")
        print_header("------------------------------------------------------")
        print_internal("")

        print_internal(self.format_as_table(self.metrics_table(baseline_stats, contender_stats)))
Exemple #2
0
    def report(self, r1, r2):
        logger.info("Generating comparison report for baseline (invocation=[%s], track=[%s], challenge=[%s], car=[%s]) and "
                    "contender (invocation=[%s], track=[%s], challenge=[%s], car=[%s])" %
                    (r1.trial_timestamp, r1.track, r1.challenge, r1.car,
                     r2.trial_timestamp, r2.track, r2.challenge, r2.car))
        # we don't verify anything about the races as it is possible that the user benchmarks two different tracks intentionally
        baseline_store = metrics.metrics_store(self._config,
                                               invocation=r1.trial_timestamp, track=r1.track, challenge=r1.challenge.name, car=r1.car)
        baseline_stats = Stats(baseline_store, r1.challenge)

        contender_store = metrics.metrics_store(self._config,
                                                invocation=r2.trial_timestamp, track=r2.track, challenge=r2.challenge.name, car=r2.car)
        contender_stats = Stats(contender_store, r2.challenge)

        print_internal("")
        print_internal("Comparing baseline")
        print_internal("  Race timestamp: %s" % r1.trial_timestamp)
        print_internal("  Challenge: %s" % r1.challenge.name)
        print_internal("  Car: %s" % r1.car)
        print_internal("")
        print_internal("with contender")
        print_internal("  Race timestamp: %s" % r2.trial_timestamp)
        print_internal("  Challenge: %s" % r2.challenge.name)
        print_internal("  Car: %s" % r2.car)
        print_internal("")
        print_header("------------------------------------------------------")
        print_header("    _______             __   _____                    ")
        print_header("   / ____(_)___  ____ _/ /  / ___/_________  ________ ")
        print_header("  / /_  / / __ \/ __ `/ /   \__ \/ ___/ __ \/ ___/ _ \\")
        print_header(" / __/ / / / / / /_/ / /   ___/ / /__/ /_/ / /  /  __/")
        print_header("/_/   /_/_/ /_/\__,_/_/   /____/\___/\____/_/   \___/ ")
        print_header("------------------------------------------------------")
        print_internal("")

        metrics_table = []
        metrics_table += self.report_total_times(baseline_stats, contender_stats)
        metrics_table += self.report_merge_part_times(baseline_stats, contender_stats)

        # metrics_table += self.report_cpu_usage(baseline_stats, contender_stats)
        metrics_table += self.report_gc_times(baseline_stats, contender_stats)

        metrics_table += self.report_disk_usage(baseline_stats, contender_stats)
        metrics_table += self.report_segment_memory(baseline_stats, contender_stats)
        metrics_table += self.report_segment_counts(baseline_stats, contender_stats)

        for t1 in r1.challenge.schedule:
            for t2 in r2.challenge.schedule:
                # only report matching metrics
                if t1.operation.name == t2.operation.name:
                    metrics_table += self.report_throughput(baseline_stats, contender_stats, t1.operation)
                    metrics_table += self.report_latency(baseline_stats, contender_stats, t1.operation)
                    metrics_table += self.report_service_time(baseline_stats, contender_stats, t1.operation)

        print_internal(tabulate.tabulate(metrics_table,
                                         headers=["Metric", "Operation", "Baseline", "Contender", "Diff", "Unit"],
                                         numalign="right", stralign="right"))
Exemple #3
0
    def report(self, r1, r2):
        logger.info("Generating comparison report for baseline (invocation=[%s], track=[%s], challenge=[%s], car=[%s]) and "
                    "contender (invocation=[%s], track=[%s], challenge=[%s], car=[%s])" %
                    (r1.trial_timestamp, r1.track, r1.challenge, r1.car,
                     r2.trial_timestamp, r2.track, r2.challenge, r2.car))
        # we don't verify anything about the races as it is possible that the user benchmarks two different tracks intentionally
        baseline_store = metrics.metrics_store(self._config,
                                               invocation=r1.trial_timestamp, track=r1.track, challenge=r1.challenge.name, car=r1.car)
        baseline_stats = Stats(baseline_store, r1.challenge)

        contender_store = metrics.metrics_store(self._config,
                                                invocation=r2.trial_timestamp, track=r2.track, challenge=r2.challenge.name, car=r2.car)
        contender_stats = Stats(contender_store, r2.challenge)

        print_internal("")
        print_internal("Comparing baseline")
        print_internal("  Race timestamp: %s" % r1.trial_timestamp)
        print_internal("  Challenge: %s" % r1.challenge.name)
        print_internal("  Car: %s" % r1.car)
        print_internal("")
        print_internal("with contender")
        print_internal("  Race timestamp: %s" % r2.trial_timestamp)
        print_internal("  Challenge: %s" % r2.challenge.name)
        print_internal("  Car: %s" % r2.car)
        print_internal("")
        print_header("------------------------------------------------------")
        print_header("    _______             __   _____                    ")
        print_header("   / ____(_)___  ____ _/ /  / ___/_________  ________ ")
        print_header("  / /_  / / __ \/ __ `/ /   \__ \/ ___/ __ \/ ___/ _ \\")
        print_header(" / __/ / / / / / /_/ / /   ___/ / /__/ /_/ / /  /  __/")
        print_header("/_/   /_/_/ /_/\__,_/_/   /____/\___/\____/_/   \___/ ")
        print_header("------------------------------------------------------")
        print_internal("")

        metrics_table = []
        metrics_table += self.report_total_times(baseline_stats, contender_stats)
        metrics_table += self.report_merge_part_times(baseline_stats, contender_stats)

        # metrics_table += self.report_cpu_usage(baseline_stats, contender_stats)
        metrics_table += self.report_gc_times(baseline_stats, contender_stats)

        metrics_table += self.report_disk_usage(baseline_stats, contender_stats)
        metrics_table += self.report_segment_memory(baseline_stats, contender_stats)
        metrics_table += self.report_segment_counts(baseline_stats, contender_stats)

        for t1 in r1.challenge.schedule:
            for t2 in r2.challenge.schedule:
                # only report matching metrics
                if t1.operation.name == t2.operation.name:
                    metrics_table += self.report_throughput(baseline_stats, contender_stats, t1.operation)
                    metrics_table += self.report_latency(baseline_stats, contender_stats, t1.operation)
                    metrics_table += self.report_service_time(baseline_stats, contender_stats, t1.operation)

        print_internal(tabulate.tabulate(metrics_table,
                                         headers=["Metric", "Operation", "Baseline", "Contender", "Diff", "Unit"],
                                         numalign="right", stralign="right"))
Exemple #4
0
    def report(self, r1, r2):
        logger.info("Generating comparison report for baseline (invocation=[%s], track=[%s], challenge=[%s], car=[%s]) and "
                    "contender (invocation=[%s], track=[%s], challenge=[%s], car=[%s])" %
                    (r1.trial_timestamp, r1.track, r1.challenge, r1.car,
                     r2.trial_timestamp, r2.track, r2.challenge, r2.car))
        # we don't verify anything about the races as it is possible that the user benchmarks two different tracks intentionally
        baseline_store = metrics.metrics_store(self._config)
        baseline_store.open(r1.trial_timestamp, r1.track, r1.challenge.name, r1.car)
        baseline_stats = Stats(baseline_store,
                               stats_sample_size=r1.challenge.stats_sample_size,
                               queries=r1.challenge.queries,
                               search_sample_size=r1.challenge.search_sample_size)

        contender_store = metrics.metrics_store(self._config)
        contender_store.open(r2.trial_timestamp, r2.track, r2.challenge.name, r2.car)
        contender_stats = Stats(contender_store,
                                stats_sample_size=r2.challenge.stats_sample_size,
                                queries=r2.challenge.queries,
                                search_sample_size=r2.challenge.search_sample_size)

        print_internal("")
        print_internal("Comparing baseline")
        print_internal("  Race timestamp: %s" % r1.trial_timestamp)
        print_internal("  Challenge: %s" % r1.challenge.name)
        print_internal("  Car: %s" % r1.car)
        print_internal("")
        print_internal("with contender")
        print_internal("  Race timestamp: %s" % r2.trial_timestamp)
        print_internal("  Challenge: %s" % r2.challenge.name)
        print_internal("  Car: %s" % r2.car)
        print_internal("")
        print_header("------------------------------------------------------")
        print_header("    _______             __   _____                    ")
        print_header("   / ____(_)___  ____ _/ /  / ___/_________  ________ ")
        print_header("  / /_  / / __ \/ __ `/ /   \__ \/ ___/ __ \/ ___/ _ \\")
        print_header(" / __/ / / / / / /_/ / /   ___/ / /__/ /_/ / /  /  __/")
        print_header("/_/   /_/_/ /_/\__,_/_/   /____/\___/\____/_/   \___/ ")
        print_header("------------------------------------------------------")

        metrics_table = []

        metrics_table += self.report_index_throughput(baseline_stats, contender_stats)
        metrics_table += self.report_merge_part_times(baseline_stats, contender_stats)
        metrics_table += self.report_total_times(baseline_stats, contender_stats)
        metrics_table += self.report_search_latency(baseline_stats, contender_stats)
        metrics_table += self.report_cpu_usage(baseline_stats, contender_stats)
        metrics_table += self.report_gc_times(baseline_stats, contender_stats)
        metrics_table += self.report_disk_usage(baseline_stats, contender_stats)
        metrics_table += self.report_segment_memory(baseline_stats, contender_stats)
        metrics_table += self.report_segment_counts(baseline_stats, contender_stats)
        metrics_table += self.report_stats_latency(baseline_stats, contender_stats)


        print_internal(
            tabulate.tabulate(metrics_table, headers=["Metric", "Baseline", "Contender", "Diff"], numalign="right", stralign="right"))
Exemple #5
0
    def setup(self, sources=False):
        # to load the track we need to know the correct cluster distribution version. Usually, this value should be set
        # but there are rare cases (external pipeline and user did not specify the distribution version) where we need
        # to derive it ourselves. For source builds we always assume "master"
        if not sources and not self.cfg.exists("mechanic", "distribution.version"):
            distribution_version = mechanic.cluster_distribution_version(self.cfg)
            self.logger.info("Automatically derived distribution version [%s]", distribution_version)
            self.cfg.add(config.Scope.benchmark, "mechanic", "distribution.version", distribution_version)
            min_es_version = versions.Version.from_string(version.minimum_es_version())
            specified_version = versions.Version.from_string(distribution_version)
            if specified_version < min_es_version:
                raise exceptions.SystemSetupError(f"Cluster version must be at least [{min_es_version}] but was [{distribution_version}]")

        self.current_track = track.load_track(self.cfg)
        self.track_revision = self.cfg.opts("track", "repository.revision", mandatory=False)
        challenge_name = self.cfg.opts("track", "challenge.name")
        self.current_challenge = self.current_track.find_challenge_or_default(challenge_name)
        if self.current_challenge is None:
            raise exceptions.SystemSetupError(
                "Track [{}] does not provide challenge [{}]. List the available tracks with {} list tracks.".format(
                    self.current_track.name, challenge_name, PROGRAM_NAME))
        if self.current_challenge.user_info:
            console.info(self.current_challenge.user_info)
        self.race = metrics.create_race(self.cfg, self.current_track, self.current_challenge, self.track_revision)

        self.metrics_store = metrics.metrics_store(
            self.cfg,
            track=self.race.track_name,
            challenge=self.race.challenge_name,
            read_only=False
        )
        self.race_store = metrics.race_store(self.cfg)
Exemple #6
0
    def __init__(self,
                 cfg,
                 sources=False,
                 build=False,
                 distribution=False,
                 external=False,
                 docker=False):
        self.cfg = cfg
        # we preload the track here but in rare cases (external pipeline and user did not specify the distribution version) we might need
        # to reload the track again. We are assuming that a track always specifies the same challenges for each version (i.e. branch).
        t = self._load_track()
        challenge = self._find_challenge(t)

        self.race = metrics.create_race(self.cfg, t, challenge)

        self.metrics_store = metrics.metrics_store(
            self.cfg,
            track=self.race.track_name,
            challenge=self.race.challenge_name,
            read_only=False)
        self.race_store = metrics.race_store(self.cfg)
        self.sources = sources
        self.build = build
        self.distribution = distribution
        self.external = external
        self.docker = docker
        self.actor_system = None
        self.mechanic = None
        self.ignore_unknown_return = self.cfg.opts("system",
                                                   "ignore.unknown.return")
Exemple #7
0
 def __init__(self,
              cfg,
              sources=False,
              build=False,
              distribution=False,
              external=False,
              docker=False):
     self.cfg = cfg
     self.track = track.load_track(self.cfg)
     challenge_name = self.cfg.opts("track", "challenge.name")
     challenge = self.track.find_challenge_or_default(challenge_name)
     if challenge is None:
         raise exceptions.SystemSetupError(
             "Track [%s] does not provide challenge [%s]. List the available tracks with %s list tracks."
             % (self.track.name, challenge_name, PROGRAM_NAME))
     self.metrics_store = metrics.metrics_store(self.cfg,
                                                track=self.track.name,
                                                challenge=challenge.name,
                                                read_only=False)
     self.race_store = metrics.race_store(self.cfg)
     self.sources = sources
     self.build = build
     self.distribution = distribution
     self.external = external
     self.docker = docker
     self.actor_system = None
     self.mechanic = None
Exemple #8
0
def benchmark_only(cfg):
    # We'll use a special car name for external benchmarks.
    cfg.add(config.Scope.benchmark, "benchmarks", "car", "external")
    metrics_store = metrics.metrics_store(cfg, read_only=False)
    return race(
        Benchmark(cfg, mechanic.create(cfg, metrics_store, external=True),
                  metrics_store), cfg)
Exemple #9
0
    def report(self, t):
        if self.is_final_report():
            print_internal("")
            print_header(
                "------------------------------------------------------")
            print_header(
                "    _______             __   _____                    ")
            print_header(
                "   / ____(_)___  ____ _/ /  / ___/_________  ________ ")
            print_header(
                "  / /_  / / __ \/ __ `/ /   \__ \/ ___/ __ \/ ___/ _ \\")
            print_header(
                " / __/ / / / / / /_/ / /   ___/ / /__/ /_/ / /  /  __/")
            print_header(
                "/_/   /_/_/ /_/\__,_/_/   /____/\___/\____/_/   \___/ ")
            print_header(
                "------------------------------------------------------")
            print_internal("")
        else:
            print_internal("")
            print_header("--------------------------------------------------")
            print_header("    __                   _____                    ")
            print_header("   / /   ____ _____     / ___/_________  ________ ")
            print_header("  / /   / __ `/ __ \    \__ \/ ___/ __ \/ ___/ _ \\")
            print_header(" / /___/ /_/ / /_/ /   ___/ / /__/ /_/ / /  /  __/")
            print_header("/_____/\__,_/ .___/   /____/\___/\____/_/   \___/ ")
            print_header("           /_/                                    ")
            print_header("--------------------------------------------------")
            print_internal("")

        selected_challenge = self._config.opts("benchmarks", "challenge")
        for challenge in t.challenges:
            if challenge.name == selected_challenge:
                store = metrics.metrics_store(self._config)
                stats = Stats(store, challenge, self._lap)

                metrics_table = []
                meta_info_table = []
                metrics_table += self.report_total_times(stats)
                metrics_table += self.report_merge_part_times(stats)

                metrics_table += self.report_cpu_usage(stats)
                metrics_table += self.report_gc_times(stats)

                metrics_table += self.report_disk_usage(stats)
                metrics_table += self.report_segment_memory(stats)
                metrics_table += self.report_segment_counts(stats)

                for tasks in challenge.schedule:
                    for task in tasks:
                        metrics_table += self.report_throughput(
                            stats, task.operation)
                        metrics_table += self.report_latency(
                            stats, task.operation)
                        metrics_table += self.report_service_time(
                            stats, task.operation)

                meta_info_table += self.report_meta_info()

                self.write_report(metrics_table, meta_info_table)
Exemple #10
0
 def start_metrics(self, track, challenge, car):
     invocation = self._config.opts("meta", "time.start")
     self._metrics_store = metrics.metrics_store(self._config)
     self._metrics_store.open(invocation,
                              track,
                              challenge,
                              car,
                              create=True)
Exemple #11
0
    def test_calculate_simple_index_stats(self):
        cfg = config.Config()
        cfg.add(config.Scope.application, "system", "env.name", "unittest")
        cfg.add(config.Scope.application, "system", "time.start", datetime.datetime.now())
        cfg.add(config.Scope.application, "system", "trial.id", "6ebc6e53-ee20-4b0c-99b4-09697987e9f4")
        cfg.add(config.Scope.application, "reporting", "datastore.type", "in-memory")
        cfg.add(config.Scope.application, "mechanic", "car.names", ["unittest_car"])
        cfg.add(config.Scope.application, "mechanic", "car.params", {})
        cfg.add(config.Scope.application, "mechanic", "plugin.params", {})
        cfg.add(config.Scope.application, "race", "laps", 1)
        cfg.add(config.Scope.application, "race", "user.tag", "")
        cfg.add(config.Scope.application, "race", "pipeline", "from-sources-skip-build")
        cfg.add(config.Scope.application, "track", "params", {})

        index = track.Task(name="index #1", operation=track.Operation(name="index", operation_type=track.OperationType.Bulk, params=None))
        challenge = track.Challenge(name="unittest", schedule=[index], default=True)
        t = track.Track("unittest", "unittest-track", challenges=[challenge])

        store = metrics.metrics_store(cfg, read_only=False, track=t, challenge=challenge)
        store.lap = 1

        store.put_value_cluster_level("throughput", 500, unit="docs/s", task="index #1", operation_type=track.OperationType.Bulk)
        store.put_value_cluster_level("throughput", 1000, unit="docs/s", task="index #1", operation_type=track.OperationType.Bulk)
        store.put_value_cluster_level("throughput", 2000, unit="docs/s", task="index #1", operation_type=track.OperationType.Bulk)

        store.put_value_cluster_level("latency", 2800, unit="ms", task="index #1", operation_type=track.OperationType.Bulk,
                                      sample_type=metrics.SampleType.Warmup)
        store.put_value_cluster_level("latency", 200, unit="ms", task="index #1", operation_type=track.OperationType.Bulk)
        store.put_value_cluster_level("latency", 220, unit="ms", task="index #1", operation_type=track.OperationType.Bulk)
        store.put_value_cluster_level("latency", 225, unit="ms", task="index #1", operation_type=track.OperationType.Bulk)

        store.put_value_cluster_level("service_time", 250, unit="ms", task="index #1", operation_type=track.OperationType.Bulk,
                                      sample_type=metrics.SampleType.Warmup, meta_data={"success": False})
        store.put_value_cluster_level("service_time", 190, unit="ms", task="index #1", operation_type=track.OperationType.Bulk,
                                      meta_data={"success": True})
        store.put_value_cluster_level("service_time", 200, unit="ms", task="index #1", operation_type=track.OperationType.Bulk,
                                      meta_data={"success": False})
        store.put_value_cluster_level("service_time", 215, unit="ms", task="index #1", operation_type=track.OperationType.Bulk,
                                      meta_data={"success": True})
        store.put_count_node_level("rally-node-0", "final_index_size_bytes", 2048, unit="bytes")
        store.put_count_node_level("rally-node-1", "final_index_size_bytes", 4096, unit="bytes")

        stats = reporter.calculate_results(store, metrics.create_race(cfg, t, challenge))

        del store

        opm = stats.metrics("index #1")
        self.assertEqual(collections.OrderedDict([("min", 500), ("median", 1000), ("max", 2000), ("unit", "docs/s")]), opm["throughput"])
        self.assertEqual(collections.OrderedDict([("50_0", 220), ("100_0", 225)]), opm["latency"])
        self.assertEqual(collections.OrderedDict([("50_0", 200), ("100_0", 215)]), opm["service_time"])
        self.assertAlmostEqual(0.3333333333333333, opm["error_rate"])

        self.assertEqual(6144, stats.index_size)
Exemple #12
0
    def setup(self, msg, sender):
        self.start_sender = sender
        self.cfg = msg.cfg
        # to load the track we need to know the correct cluster distribution version. Usually, this value should be set but there are rare
        # cases (external pipeline and user did not specify the distribution version) where we need to derive it ourselves. For source
        # builds we always assume "master"
        if not msg.sources and not self.cfg.exists("mechanic",
                                                   "distribution.version"):
            distribution_version = mechanic.cluster_distribution_version(
                self.cfg)
            if not distribution_version:
                raise exceptions.SystemSetupError(
                    "A distribution version is required. Please specify it with --distribution-version."
                )
            self.logger.info("Automatically derived distribution version [%s]",
                             distribution_version)
            self.cfg.add(config.Scope.benchmark, "mechanic",
                         "distribution.version", distribution_version)

        t = track.load_track(self.cfg)
        self.track_revision = self.cfg.opts("track",
                                            "repository.revision",
                                            mandatory=False)
        challenge_name = self.cfg.opts("track", "challenge.name")
        challenge = t.find_challenge_or_default(challenge_name)
        if challenge is None:
            raise exceptions.SystemSetupError(
                "Track [%s] does not provide challenge [%s]. List the available tracks with %s list tracks."
                % (t.name, challenge_name, PROGRAM_NAME))
        if challenge.user_info:
            console.info(challenge.user_info)
        self.race = metrics.create_race(self.cfg, t, challenge,
                                        self.track_revision)

        self.metrics_store = metrics.metrics_store(
            self.cfg,
            track=self.race.track_name,
            challenge=self.race.challenge_name,
            read_only=False)
        self.race_store = metrics.race_store(self.cfg)
        self.logger.info("Asking mechanic to start the engine.")
        cluster_settings = challenge.cluster_settings
        self.mechanic = self.createActor(
            mechanic.MechanicActor,
            targetActorRequirements={"coordinator": True})
        self.send(
            self.mechanic,
            mechanic.StartEngine(self.cfg, self.metrics_store.open_context,
                                 cluster_settings, msg.sources, msg.build,
                                 msg.distribution, msg.external, msg.docker))
    def test_calculate_simple_index_stats(self):
        cfg = config.Config()
        cfg.add(config.Scope.application, "system", "env.name", "unittest")
        cfg.add(config.Scope.application, "system", "time.start", datetime.datetime.now())
        cfg.add(config.Scope.application, "reporting", "datastore.type", "in-memory")
        cfg.add(config.Scope.application, "mechanic", "car.names", ["unittest_car"])
        cfg.add(config.Scope.application, "race", "laps", 1)
        cfg.add(config.Scope.application, "race", "user.tag", "")
        cfg.add(config.Scope.application, "race", "pipeline", "from-sources-skip-build")

        index = track.Task(operation=track.Operation(name="index", operation_type=track.OperationType.Index, params=None))
        challenge = track.Challenge(name="unittest", description="", index_settings=None, schedule=[index], default=True)
        t = track.Track("unittest", "unittest-track", challenges=[challenge])

        store = metrics.metrics_store(cfg, read_only=False, track=t, challenge=challenge)
        store.lap = 1

        store.put_value_cluster_level("throughput", 500, unit="docs/s", operation="index", operation_type=track.OperationType.Index)
        store.put_value_cluster_level("throughput", 1000, unit="docs/s", operation="index", operation_type=track.OperationType.Index)
        store.put_value_cluster_level("throughput", 2000, unit="docs/s", operation="index", operation_type=track.OperationType.Index)

        store.put_value_cluster_level("latency", 2800, unit="ms", operation="index", operation_type=track.OperationType.Index,
                                      sample_type=metrics.SampleType.Warmup)
        store.put_value_cluster_level("latency", 200, unit="ms", operation="index", operation_type=track.OperationType.Index)
        store.put_value_cluster_level("latency", 220, unit="ms", operation="index", operation_type=track.OperationType.Index)
        store.put_value_cluster_level("latency", 225, unit="ms", operation="index", operation_type=track.OperationType.Index)

        store.put_value_cluster_level("service_time", 250, unit="ms", operation="index", operation_type=track.OperationType.Index,
                                      sample_type=metrics.SampleType.Warmup, meta_data={"success": False})
        store.put_value_cluster_level("service_time", 190, unit="ms", operation="index", operation_type=track.OperationType.Index,
                                      meta_data={"success": True})
        store.put_value_cluster_level("service_time", 200, unit="ms", operation="index", operation_type=track.OperationType.Index,
                                      meta_data={"success": False})
        store.put_value_cluster_level("service_time", 215, unit="ms", operation="index", operation_type=track.OperationType.Index,
                                      meta_data={"success": True})
        store.put_count_node_level("rally-node-0", "final_index_size_bytes", 2048, unit="bytes")
        store.put_count_node_level("rally-node-1", "final_index_size_bytes", 4096, unit="bytes")

        stats = reporter.calculate_results(store, metrics.create_race(cfg, t, challenge))

        del store

        opm = stats.metrics("index")
        self.assertEqual(collections.OrderedDict([("min", 500), ("median", 1000), ("max", 2000), ("unit", "docs/s")]), opm["throughput"])
        self.assertEqual(collections.OrderedDict([("50", 220), ("100", 225)]), opm["latency"])
        self.assertEqual(collections.OrderedDict([("50", 200), ("100", 215)]), opm["service_time"])
        self.assertAlmostEqual(0.3333333333333333, opm["error_rate"])

        self.assertEqual(6144, stats.index_size)
Exemple #14
0
    def report(self, t):
        print_internal("")
        print_header("------------------------------------------------------")
        print_header("    _______             __   _____                    ")
        print_header("   / ____(_)___  ____ _/ /  / ___/_________  ________ ")
        print_header("  / /_  / / __ \/ __ `/ /   \__ \/ ___/ __ \/ ___/ _ \\")
        print_header(" / __/ / / / / / /_/ / /   ___/ / /__/ /_/ / /  /  __/")
        print_header("/_/   /_/_/ /_/\__,_/_/   /____/\___/\____/_/   \___/ ")
        print_header("------------------------------------------------------")
        print_internal("")

        selected_challenge = self._config.opts("benchmarks", "challenge")
        selected_car = self._config.opts("benchmarks", "car")
        invocation = self._config.opts("meta", "time.start")
        logger.info(
            "Generating summary report for invocation=[%s], track=[%s], challenge=[%s], car=[%s]"
            % (invocation, t.name, selected_challenge, selected_car))
        for challenge in t.challenges:
            if challenge.name == selected_challenge:
                store = metrics.metrics_store(self._config)
                store.open(invocation, t.name, challenge.name, selected_car)

                stats = Stats(store, challenge)

                metrics_table = []
                meta_info_table = []
                metrics_table += self.report_total_times(stats)
                metrics_table += self.report_merge_part_times(stats)

                metrics_table += self.report_cpu_usage(stats)
                metrics_table += self.report_gc_times(stats)

                metrics_table += self.report_disk_usage(stats)
                metrics_table += self.report_segment_memory(stats)
                metrics_table += self.report_segment_counts(stats)

                for tasks in challenge.schedule:
                    for task in tasks:
                        metrics_table += self.report_throughput(
                            stats, task.operation)
                        metrics_table += self.report_latency(
                            stats, task.operation)
                        metrics_table += self.report_service_time(
                            stats, task.operation)

                meta_info_table += self.report_meta_info()

                self.write_report(metrics_table, meta_info_table)
Exemple #15
0
    def report(self, t):
        print_header("------------------------------------------------------")
        print_header("    _______             __   _____                    ")
        print_header("   / ____(_)___  ____ _/ /  / ___/_________  ________ ")
        print_header("  / /_  / / __ \/ __ `/ /   \__ \/ ___/ __ \/ ___/ _ \\")
        print_header(" / __/ / / / / / /_/ / /   ___/ / /__/ /_/ / /  /  __/")
        print_header("/_/   /_/_/ /_/\__,_/_/   /____/\___/\____/_/   \___/ ")
        print_header("------------------------------------------------------")

        selected_challenge = self._config.opts("benchmarks", "challenge")
        selected_car = self._config.opts("benchmarks", "car")
        invocation = self._config.opts("meta", "time.start")
        logger.info("Generating summary report for invocation=[%s], track=[%s], challenge=[%s], car=[%s]" %
                    (invocation, t.name, selected_challenge, selected_car))
        for challenge in t.challenges:
            if challenge.name == selected_challenge:
                store = metrics.metrics_store(self._config)
                store.open(invocation, t.name, challenge.name, selected_car)

                stats = Stats(store,
                              self.guarded(lambda: challenge.benchmark[track.BenchmarkPhase.stats].iteration_count),
                              self.guarded(lambda: challenge.benchmark[track.BenchmarkPhase.search].queries),
                              self.guarded(lambda: challenge.benchmark[track.BenchmarkPhase.search].iteration_count))

                metrics_table = []
                if track.BenchmarkPhase.index in challenge.benchmark:
                    metrics_table += self.report_index_throughput(stats)
                    metrics_table += self.report_total_times(stats)
                    metrics_table += self.report_merge_part_times(stats)

                if track.BenchmarkPhase.search in challenge.benchmark:
                    metrics_table += self.report_search_latency(stats)

                metrics_table += self.report_cpu_usage(stats)
                metrics_table += self.report_gc_times(stats)

                metrics_table += self.report_disk_usage(stats)
                metrics_table += self.report_segment_memory(stats)
                metrics_table += self.report_segment_counts(stats)

                if track.BenchmarkPhase.stats in challenge.benchmark:
                    metrics_table += self.report_stats_latency(stats)

                self.write_report(metrics_table)
Exemple #16
0
    def report(self, t):
        print_header("------------------------------------------------------")
        print_header("    _______             __   _____                    ")
        print_header("   / ____(_)___  ____ _/ /  / ___/_________  ________ ")
        print_header("  / /_  / / __ \/ __ `/ /   \__ \/ ___/ __ \/ ___/ _ \\")
        print_header(" / __/ / / / / / /_/ / /   ___/ / /__/ /_/ / /  /  __/")
        print_header("/_/   /_/_/ /_/\__,_/_/   /____/\___/\____/_/   \___/ ")
        print_header("------------------------------------------------------")

        selected_challenge = self._config.opts("benchmarks", "challenge")
        selected_car = self._config.opts("benchmarks", "car")
        invocation = self._config.opts("meta", "time.start")
        logger.info("Generating summary report for invocation=[%s], track=[%s], challenge=[%s], car=[%s]" %
                    (invocation, t.name, selected_challenge, selected_car))
        for challenge in t.challenges:
            if challenge.name == selected_challenge:
                store = metrics.metrics_store(self._config)
                store.open(invocation, t.name, challenge.name, selected_car)

                stats = Stats(store,
                              self.guarded(lambda: challenge.benchmark[track.BenchmarkPhase.stats].iteration_count),
                              self.guarded(lambda: challenge.benchmark[track.BenchmarkPhase.search].queries),
                              self.guarded(lambda: challenge.benchmark[track.BenchmarkPhase.search].iteration_count))

                metrics_table = []
                if track.BenchmarkPhase.index in challenge.benchmark:
                    metrics_table += self.report_index_throughput(stats)
                    metrics_table += self.report_total_times(stats)
                    metrics_table += self.report_merge_part_times(stats)

                if track.BenchmarkPhase.search in challenge.benchmark:
                    metrics_table += self.report_search_latency(stats)

                metrics_table += self.report_cpu_usage(stats)
                metrics_table += self.report_gc_times(stats)

                metrics_table += self.report_disk_usage(stats)
                metrics_table += self.report_segment_memory(stats)
                metrics_table += self.report_segment_counts(stats)

                if track.BenchmarkPhase.stats in challenge.benchmark:
                    metrics_table += self.report_stats_latency(stats)

                self.write_report(metrics_table)
Exemple #17
0
    def setup(self):
        if self.track.has_plugins:
            # no need to fetch the track once more; it has already been updated
            track.track_repo(self.config, fetch=False, update=False)
            # load track plugins eagerly to initialize the respective parameter sources
            track.load_track_plugins(self.config, runner.register_runner,
                                     scheduler.register_scheduler)
        track.prepare_track(self.track, self.config)

        self.metrics_store = metrics.metrics_store(
            cfg=self.config,
            track=self.track.name,
            challenge=self.challenge.name,
            read_only=False)
        self.es_clients = self.create_es_clients()
        self.wait_for_rest_api()
        self.prepare_telemetry()

        cluster_info = self.retrieve_cluster_info()
        cluster_version = cluster_info["version"] if cluster_info else {}
        return cluster_version.get("build_flavor", "oss"), cluster_version.get(
            "number"), cluster_version.get("build_hash")
    def setup(self, msg):
        self.mechanic = self.createActor(mechanic.MechanicActor,
                                         #globalName="/rally/mechanic/coordinator",
                                         targetActorRequirements={"coordinator": True})

        self.cfg = msg.cfg
        # to load the track we need to know the correct cluster distribution version. Usually, this value should be set but there are rare
        # cases (external pipeline and user did not specify the distribution version) where we need to derive it ourselves. For source
        # builds we always assume "master"
        if not msg.sources and not self.cfg.exists("mechanic", "distribution.version"):
            distribution_version = mechanic.cluster_distribution_version(self.cfg)
            if not distribution_version:
                raise exceptions.SystemSetupError("A distribution version is required. Please specify it with --distribution-version.")
            logger.info("Automatically derived distribution version [%s]" % distribution_version)
            self.cfg.add(config.Scope.benchmark, "mechanic", "distribution.version", distribution_version)

        t = track.load_track(self.cfg)
        challenge_name = self.cfg.opts("track", "challenge.name")
        challenge = t.find_challenge_or_default(challenge_name)
        if challenge is None:
            raise exceptions.SystemSetupError("Track [%s] does not provide challenge [%s]. List the available tracks with %s list tracks."
                                              % (t.name, challenge_name, PROGRAM_NAME))
        if challenge.user_info:
            console.info(challenge.user_info, logger=logger)
        self.race = metrics.create_race(self.cfg, t, challenge)

        self.metrics_store = metrics.metrics_store(
            self.cfg,
            track=self.race.track_name,
            challenge=self.race.challenge_name,
            read_only=False
        )
        self.lap_counter = LapCounter(self.race, self.metrics_store, self.cfg)
        self.race_store = metrics.race_store(self.cfg)
        logger.info("Asking mechanic to start the engine.")
        cluster_settings = self.race.challenge.cluster_settings
        self.send(self.mechanic, mechanic.StartEngine(self.cfg, self.metrics_store.open_context, cluster_settings, msg.sources, msg.build,
                                                      msg.distribution, msg.external, msg.docker))
Exemple #19
0
 def __init__(self,
              cfg,
              sources=False,
              build=False,
              distribution=False,
              external=False,
              docker=False):
     self.cfg = cfg
     self.track = track.load_track(self.cfg)
     self.metrics_store = metrics.metrics_store(
         self.cfg,
         track=self.track.name,
         challenge=self.track.find_challenge_or_default(
             self.cfg.opts("track", "challenge.name")).name,
         read_only=False)
     self.race_store = metrics.race_store(self.cfg)
     self.sources = sources
     self.build = build
     self.distribution = distribution
     self.external = external
     self.docker = docker
     self.actor_system = None
     self.mechanic = None
Exemple #20
0
def from_sources_skip_build(cfg):
    metrics_store = metrics.metrics_store(cfg, read_only=False)
    return race(Benchmark(cfg, mechanic.create(cfg, metrics_store, sources=True, build=False), metrics_store), cfg)
Exemple #21
0
    def test_run_benchmark(self):
        cfg = config.Config()

        cfg.add(config.Scope.application, "system", "env.name", "unittest")
        cfg.add(
            config.Scope.application, "system", "time.start",
            datetime(year=2017, month=8, day=20, hour=1, minute=0, second=0))
        cfg.add(config.Scope.application, "system", "race.id",
                "6ebc6e53-ee20-4b0c-99b4-09697987e9f4")
        cfg.add(config.Scope.application, "system", "offline.mode", False)
        cfg.add(config.Scope.application, "driver", "on.error", "abort")
        cfg.add(config.Scope.application, "driver", "profiling", False)
        cfg.add(config.Scope.application, "reporting", "datastore.type",
                "in-memory")
        cfg.add(config.Scope.application, "track", "params", {})
        cfg.add(config.Scope.application, "track", "test.mode.enabled", True)
        cfg.add(config.Scope.application, "telemetry", "devices", [])
        cfg.add(config.Scope.application, "telemetry", "params", {})
        cfg.add(config.Scope.application, "mechanic", "car.names",
                ["external"])
        cfg.add(config.Scope.application, "mechanic", "skip.rest.api.check",
                True)
        cfg.add(
            config.Scope.application, "client", "hosts",
            AsyncDriverTests.Holder(all_hosts={"default": ["localhost:9200"]}))
        cfg.add(config.Scope.application, "client", "options",
                AsyncDriverTests.Holder(all_client_options={"default": {}}))

        params.register_param_source_for_name("bulk-param-source",
                                              AsyncDriverTestParamSource)

        task = track.Task(
            name="bulk-index",
            operation=track.Operation(
                "bulk-index",
                track.OperationType.Bulk.name,
                params={
                    "body": ["action_metadata_line", "index_line"],
                    "action-metadata-present": True,
                    "bulk-size": 1,
                    # we need this because the parameter source does not know that we only have one
                    # bulk and hence size() returns incorrect results
                    "size": 1
                },
                param_source="bulk-param-source"),
            warmup_iterations=0,
            iterations=1,
            clients=1)

        current_challenge = track.Challenge(name="default",
                                            default=True,
                                            schedule=[task])
        current_track = track.Track(name="unit-test",
                                    challenges=[current_challenge])

        driver = async_driver.AsyncDriver(
            cfg,
            current_track,
            current_challenge,
            es_client_factory_class=StaticClientFactory)

        distribution_flavor, distribution_version, revision = driver.setup()
        self.assertEqual("oss", distribution_flavor)
        self.assertEqual("7.3.0", distribution_version)
        self.assertEqual("de777fa", revision)

        metrics_store_representation = driver.run()

        metric_store = metrics.metrics_store(cfg,
                                             read_only=True,
                                             track=current_track,
                                             challenge=current_challenge)
        metric_store.bulk_add(metrics_store_representation)

        self.assertIsNotNone(
            metric_store.get_one(name="latency",
                                 task="bulk-index",
                                 sample_type=metrics.SampleType.Normal))
        self.assertIsNotNone(
            metric_store.get_one(name="service_time",
                                 task="bulk-index",
                                 sample_type=metrics.SampleType.Normal))
        self.assertIsNotNone(
            metric_store.get_one(name="processing_time",
                                 task="bulk-index",
                                 sample_type=metrics.SampleType.Normal))
        self.assertIsNotNone(
            metric_store.get_one(name="throughput",
                                 task="bulk-index",
                                 sample_type=metrics.SampleType.Normal))
        self.assertIsNotNone(
            metric_store.get_one(name="node_total_young_gen_gc_time",
                                 sample_type=metrics.SampleType.Normal))
        self.assertIsNotNone(
            metric_store.get_one(name="node_total_old_gen_gc_time",
                                 sample_type=metrics.SampleType.Normal))
        # ensure that there are not more documents than we expect
        self.assertEqual(6,
                         len(metric_store.docs),
                         msg=json.dumps(metric_store.docs, indent=2))
Exemple #22
0
def from_sources_skip_build(cfg):
    metrics_store = metrics.metrics_store(cfg, read_only=False)
    return race(Benchmark(cfg, mechanic.create(cfg, metrics_store, sources=True, build=False), metrics_store), cfg)
Exemple #23
0
 def start_metrics(self, track, challenge, car):
     invocation = self._config.opts("meta", "time.start")
     self._metrics_store = metrics.metrics_store(self._config)
     self._metrics_store.open(invocation, str(track), str(challenge), str(car), create=True)
Exemple #24
0
def benchmark_only(cfg):
    # We'll use a special car name for external benchmarks.
    cfg.add(config.Scope.benchmark, "benchmarks", "car", "external")
    metrics_store = metrics.metrics_store(cfg, read_only=False)
    return race(Benchmark(cfg, mechanic.create(cfg, metrics_store, external=True), metrics_store), cfg)
Exemple #25
0
def from_distribution(cfg):
    metrics_store = metrics.metrics_store(cfg, read_only=False)
    return race(Benchmark(cfg, mechanic.create(cfg, metrics_store, distribution=True), metrics_store), cfg)
Exemple #26
0
def docker(cfg):
    metrics_store = metrics.metrics_store(cfg, read_only=False)
    return race(Benchmark(cfg, mechanic.create(cfg, metrics_store, docker=True), metrics_store), cfg)
Exemple #27
0
def from_distribution(cfg):
    metrics_store = metrics.metrics_store(cfg, read_only=False)
    return race(Benchmark(cfg, mechanic.create(cfg, metrics_store, distribution=True), metrics_store), cfg)
Exemple #28
0
    def test_calculate_simple_index_stats(self):
        cfg = config.Config()
        cfg.add(config.Scope.application, "system", "env.name", "unittest")
        cfg.add(config.Scope.application, "system", "time.start",
                datetime.datetime.now())
        cfg.add(config.Scope.application, "reporting", "datastore.type",
                "in-memory")
        cfg.add(config.Scope.application, "mechanic", "car.name",
                "unittest_car")
        cfg.add(config.Scope.application, "race", "laps", 1)
        cfg.add(config.Scope.application, "race", "user.tag", "")
        cfg.add(config.Scope.application, "race", "pipeline",
                "from-sources-skip-build")

        index = track.Task(
            operation=track.Operation(name="index",
                                      operation_type=track.OperationType.Index,
                                      params=None))
        challenge = track.Challenge(name="unittest",
                                    description="",
                                    index_settings=None,
                                    schedule=[index],
                                    default=True)
        t = track.Track("unittest", "unittest-track", challenges=[challenge])

        store = metrics.metrics_store(cfg,
                                      read_only=False,
                                      track=t,
                                      challenge=challenge)
        store.lap = 1

        store.put_value_cluster_level("throughput",
                                      500,
                                      unit="docs/s",
                                      operation="index",
                                      operation_type=track.OperationType.Index)
        store.put_value_cluster_level("throughput",
                                      1000,
                                      unit="docs/s",
                                      operation="index",
                                      operation_type=track.OperationType.Index)
        store.put_value_cluster_level("throughput",
                                      2000,
                                      unit="docs/s",
                                      operation="index",
                                      operation_type=track.OperationType.Index)

        store.put_value_cluster_level("latency",
                                      2800,
                                      unit="ms",
                                      operation="index",
                                      operation_type=track.OperationType.Index,
                                      sample_type=metrics.SampleType.Warmup)
        store.put_value_cluster_level("latency",
                                      200,
                                      unit="ms",
                                      operation="index",
                                      operation_type=track.OperationType.Index)
        store.put_value_cluster_level("latency",
                                      220,
                                      unit="ms",
                                      operation="index",
                                      operation_type=track.OperationType.Index)
        store.put_value_cluster_level("latency",
                                      225,
                                      unit="ms",
                                      operation="index",
                                      operation_type=track.OperationType.Index)

        store.put_value_cluster_level("service_time",
                                      250,
                                      unit="ms",
                                      operation="index",
                                      operation_type=track.OperationType.Index,
                                      sample_type=metrics.SampleType.Warmup,
                                      meta_data={"success": False})
        store.put_value_cluster_level("service_time",
                                      190,
                                      unit="ms",
                                      operation="index",
                                      operation_type=track.OperationType.Index,
                                      meta_data={"success": True})
        store.put_value_cluster_level("service_time",
                                      200,
                                      unit="ms",
                                      operation="index",
                                      operation_type=track.OperationType.Index,
                                      meta_data={"success": False})
        store.put_value_cluster_level("service_time",
                                      215,
                                      unit="ms",
                                      operation="index",
                                      operation_type=track.OperationType.Index,
                                      meta_data={"success": True})

        stats = reporter.calculate_results(
            store, metrics.create_race(cfg, t, challenge))

        del store

        opm = stats.metrics("index")
        self.assertEqual(
            collections.OrderedDict([("min", 500), ("median", 1000),
                                     ("max", 2000), ("unit", "docs/s")]),
            opm["throughput"])
        self.assertEqual(collections.OrderedDict([("50", 220), ("100", 225)]),
                         opm["latency"])
        self.assertEqual(collections.OrderedDict([("50", 200), ("100", 215)]),
                         opm["service_time"])
        self.assertAlmostEqual(0.3333333333333333, opm["error_rate"])
Exemple #29
0
def docker(cfg):
    metrics_store = metrics.metrics_store(cfg, read_only=False)
    return race(Benchmark(cfg, mechanic.create(cfg, metrics_store, docker=True), metrics_store), cfg)