Example #1
0
 def parse_operations(self, ops_specs):
     # key = name, value = operation
     ops = {}
     for op_spec in ops_specs:
         op_name = self._r(op_spec, "name", error_ctx="operations")
         # Rally's core operations will still use enums then but we'll allow users to define arbitrary operations
         op_type_name = self._r(op_spec,
                                "operation-type",
                                error_ctx="operations")
         try:
             op_type = track.OperationType.from_hyphenated_string(
                 op_type_name).name
             logger.debug(
                 "Using built-in operation type [%s] for operation [%s]." %
                 (op_type, op_name))
         except KeyError:
             logger.info(
                 "Using user-provided operation type [%s] for operation [%s]."
                 % (op_type_name, op_name))
             op_type = op_type_name
         param_source = self._r(op_spec,
                                "param-source",
                                error_ctx="operations",
                                mandatory=False)
         try:
             ops[op_name] = track.Operation(name=op_name,
                                            operation_type=op_type,
                                            params=op_spec,
                                            param_source=param_source)
         except exceptions.InvalidSyntax as e:
             raise TrackSyntaxError("Invalid operation [%s]: %s" %
                                    (op_name, str(e)))
     return ops
Example #2
0
    def test_scheduler_adapts_to_changed_weights(self):
        task = track.Task(
            name="bulk-index",
            operation=track.Operation(name="bulk-index",
                                      operation_type=track.OperationType.Bulk.
                                      to_hyphenated_string()),
            clients=4,
            params={"target-throughput": "5000 docs/s"},
        )

        s = scheduler.UnitAwareScheduler(
            task=task, scheduler_class=scheduler.DeterministicScheduler)
        # first request is unthrottled
        # suppress pylint false positive
        # pylint: disable=not-callable
        assert s.next(0) == 0
        # we'll start with bulks of 1.000 docs, which corresponds to 5 requests per second for all clients
        s.after_request(now=None,
                        weight=1000,
                        unit="docs",
                        request_meta_data=None)
        # suppress pylint false positive
        # pylint: disable=not-callable
        assert s.next(0) == 1 / 5 * task.clients

        # bulk size changes to 10.000 docs, which means one request every two seconds for all clients
        s.after_request(now=None,
                        weight=10000,
                        unit="docs",
                        request_meta_data=None)
        # suppress pylint false positive
        # pylint: disable=not-callable
        assert s.next(0) == 2 * task.clients
Example #3
0
    def test_scheduler_accepts_differing_units_pages_and_ops(self):
        task = track.Task(
            name="scroll-query",
            operation=track.Operation(name="scroll-query",
                                      operation_type=track.OperationType.
                                      Search.to_hyphenated_string()),
            clients=1,
            params={
                # implicitly: ops/s
                "target-throughput": 10
            },
        )

        s = scheduler.UnitAwareScheduler(
            task=task, scheduler_class=scheduler.DeterministicScheduler)
        # first request is unthrottled
        # suppress pylint false positive
        # pylint: disable=not-callable
        assert s.next(0) == 0
        # no exception despite differing units ...
        s.after_request(now=None,
                        weight=20,
                        unit="pages",
                        request_meta_data=None)
        # ... and it is still throttled in ops/s
        # suppress pylint false positive
        # pylint: disable=not-callable
        assert s.next(0) == 0.1 * task.clients
Example #4
0
    def test_scheduler_does_not_change_throughput_for_empty_requests(self):
        task = track.Task(
            name="match-all-query",
            operation=track.Operation(name="query",
                                      operation_type=track.OperationType.
                                      Search.to_hyphenated_string()),
            clients=1,
            params={
                # implicitly: ops/s
                "target-throughput": 10
            },
        )

        s = scheduler.UnitAwareScheduler(
            task=task, scheduler_class=scheduler.DeterministicScheduler)
        # first request is unthrottled...
        s.before_request(now=0)
        # suppress pylint false positive
        # pylint: disable=not-callable
        assert s.next(0) == 0
        # ... but it also produced an error (zero ops)
        s.after_request(now=1, weight=0, unit="ops", request_meta_data=None)
        # next request is still unthrottled
        s.before_request(now=1)
        # suppress pylint false positive
        # pylint: disable=not-callable
        assert s.next(0) == 0
        s.after_request(now=2, weight=1, unit="ops", request_meta_data=None)
        # now we throttle
        s.before_request(now=2)
        # suppress pylint false positive
        # pylint: disable=not-callable
        assert s.next(0) == 0.1 * task.clients
Example #5
0
    def test_scheduler_adapts_to_changed_weights(self):
        task = track.Task(name="bulk-index",
                          operation=track.Operation(
                              name="bulk-index",
                              operation_type=track.OperationType.Bulk.name),
                          clients=4,
                          params={"target-throughput": "5000 docs/s"})

        s = scheduler.UnitAwareScheduler(
            task=task, scheduler_class=scheduler.DeterministicScheduler)
        # first request is unthrottled
        self.assertEqual(0, s.next(0))
        # we'll start with bulks of 1.000 docs, which corresponds to 5 requests per second for all clients
        s.after_request(now=None,
                        weight=1000,
                        unit="docs",
                        request_meta_data=None)
        self.assertEqual(1 / 5 * task.clients, s.next(0))

        # bulk size changes to 10.000 docs, which means one request every two seconds for all clients
        s.after_request(now=None,
                        weight=10000,
                        unit="docs",
                        request_meta_data=None)
        self.assertEqual(2 * task.clients, s.next(0))
Example #6
0
    def parse_operation(self, op_spec, error_ctx="operations"):
        # just a name, let's assume it is a simple operation like force-merge and create a full operation
        if isinstance(op_spec, str):
            op_name = op_spec
            meta_data = None
            op_type_name = op_spec
            param_source = None
        else:
            meta_data = self._r(op_spec,
                                "meta",
                                error_ctx=error_ctx,
                                mandatory=False)
            # Rally's core operations will still use enums then but we'll allow users to define arbitrary operations
            op_type_name = self._r(op_spec,
                                   "operation-type",
                                   error_ctx=error_ctx)
            # fallback to use the operation type as the operation name
            op_name = self._r(op_spec,
                              "name",
                              error_ctx=error_ctx,
                              mandatory=False,
                              default_value=op_type_name)
            param_source = self._r(op_spec,
                                   "param-source",
                                   error_ctx=error_ctx,
                                   mandatory=False)

        try:
            # TODO #370: Remove this warning.
            # Add a deprecation warning but not for built-in tracks (they need to keep the name for backwards compatibility in the meantime)
            if op_type_name == "index" and \
                            self.name not in ["geonames", "geopoint", "noaa", "logging", "nyc_taxis", "pmc", "percolator", "nested"] and \
                            not self.index_op_type_warning_issued:
                console.warn(
                    "The track %s uses the deprecated operation-type [index] for bulk index operations. Please rename this "
                    "operation type to [bulk]." % self.name)
                # Don't spam the console...
                self.index_op_type_warning_issued = True

            op_type = track.OperationType.from_hyphenated_string(
                op_type_name).name
            logger.debug(
                "Using built-in operation type [%s] for operation [%s]." %
                (op_type, op_name))
        except KeyError:
            logger.info(
                "Using user-provided operation type [%s] for operation [%s]." %
                (op_type_name, op_name))
            op_type = op_type_name

        try:
            return track.Operation(name=op_name,
                                   meta_data=meta_data,
                                   operation_type=op_type,
                                   params=op_spec,
                                   param_source=param_source)
        except exceptions.InvalidSyntax as e:
            raise TrackSyntaxError("Invalid operation [%s]: %s" %
                                   (op_name, str(e)))
Example #7
0
 def create_index_task(self):
     return track.Task(
         "create-index-task",
         track.Operation("create-index-op",
                         operation_type=track.OperationType.CreateIndex.
                         to_hyphenated_string()),
         tags=["write-op", "admin-op"],
     )
Example #8
0
 def task(self, schedule=None, target_throughput=None, target_interval=None):
     op = track.Operation("bulk-index", track.OperationType.Bulk.to_hyphenated_string())
     params = {}
     if target_throughput is not None:
         params["target-throughput"] = target_throughput
     if target_interval is not None:
         params["target-interval"] = target_interval
     return track.Task("test", op, schedule=schedule, params=params)
Example #9
0
 def task(self,
          schedule=None,
          target_throughput=None,
          target_interval=None):
     op = track.Operation("bulk-index", track.OperationType.Bulk.name)
     params = {}
     if target_throughput:
         params["target-throughput"] = target_throughput
     if target_interval:
         params["target-interval"] = target_interval
     return track.Task("test", op, schedule=schedule, params=params)
Example #10
0
    def test_legacy_scheduler(self):
        task = track.Task(
            name="raw-request",
            operation=track.Operation(name="raw", operation_type=track.OperationType.RawRequest.to_hyphenated_string()),
            clients=1,
            schedule="simple",
        )

        s = scheduler.scheduler_for(task)

        self.assertEqual(0, s.next(0))
        self.assertEqual(0, s.next(0))
Example #11
0
 def task(self,
          schedule=None,
          target_throughput=None,
          target_interval=None,
          ignore_response_error_level=None):
     op = track.Operation("bulk-index",
                          track.OperationType.Bulk.to_hyphenated_string())
     params = {}
     if target_throughput is not None:
         params["target-throughput"] = target_throughput
     if target_interval is not None:
         params["target-interval"] = target_interval
     if ignore_response_error_level is not None:
         params["ignore-response-error-level"] = ignore_response_error_level
     return track.Task("test", op, schedule=schedule, params=params)
Example #12
0
    def test_scheduler_rejects_differing_throughput_units(self):
        task = track.Task(
            name="bulk-index",
            operation=track.Operation(name="bulk-index", operation_type=track.OperationType.Bulk.to_hyphenated_string()),
            clients=4,
            params={"target-throughput": "5000 MB/s"},
        )

        s = scheduler.UnitAwareScheduler(task=task, scheduler_class=scheduler.DeterministicScheduler)
        with self.assertRaises(exceptions.RallyAssertionError) as ex:
            s.after_request(now=None, weight=1000, unit="docs", request_meta_data=None)
        self.assertEqual(
            "Target throughput for [bulk-index] is specified in [MB/s] but the task throughput is measured in [docs/s].",
            ex.exception.args[0],
        )
Example #13
0
    def test_sets_absolute_path(self):
        from esrally import config
        from esrally.track import track

        cfg = config.Config()
        cfg.add(config.Scope.application, "benchmarks", "local.dataset.cache",
                "/data")

        default_challenge = track.Challenge(
            "default",
            description="default challenge",
            default=True,
            schedule=[
                track.Task(operation=track.Operation(
                    "index", operation_type=track.OperationType.Index),
                           clients=4)
            ])
        another_challenge = track.Challenge(
            "other", description="non-default challenge", default=False)
        t = track.Track(
            name="unittest",
            short_description="unittest track",
            challenges=[another_challenge, default_challenge],
            indices=[
                track.Index(name="test",
                            auto_managed=True,
                            types=[
                                track.Type(
                                    "docs",
                                    mapping={},
                                    document_file="docs/documents.json",
                                    document_archive="docs/documents.json.bz2")
                            ])
            ])

        loader.set_absolute_data_path(cfg, t)

        self.assertEqual("/data/docs/documents.json",
                         t.indices[0].types[0].document_file)
        self.assertEqual("/data/docs/documents.json.bz2",
                         t.indices[0].types[0].document_archive)
Example #14
0
    def test_scheduler_accepts_differing_units_pages_and_ops(self):
        task = track.Task(
            name="scroll-query",
            operation=track.Operation(
                name="scroll-query",
                operation_type=track.OperationType.Search.name),
            clients=1,
            params={
                # implicitly: ops/s
                "target-throughput": 10
            })

        s = scheduler.UnitAwareScheduler(
            task=task, scheduler_class=scheduler.DeterministicScheduler)
        # first request is unthrottled
        self.assertEqual(0, s.next(0))
        # no exception despite differing units ...
        s.after_request(now=None,
                        weight=20,
                        unit="pages",
                        request_meta_data=None)
        # ... and it is still throttled in ops/s
        self.assertEqual(0.1 * task.clients, s.next(0))
Example #15
0
    def test_run_benchmark(self):
        cfg = config.Config()

        cfg.add(config.Scope.application, "system", "env.name", "unittest")
        cfg.add(
            config.Scope.application, "system", "time.start",
            datetime(year=2017, month=8, day=20, hour=1, minute=0, second=0))
        cfg.add(config.Scope.application, "system", "race.id",
                "6ebc6e53-ee20-4b0c-99b4-09697987e9f4")
        cfg.add(config.Scope.application, "system", "offline.mode", False)
        cfg.add(config.Scope.application, "driver", "on.error", "abort")
        cfg.add(config.Scope.application, "driver", "profiling", False)
        cfg.add(config.Scope.application, "reporting", "datastore.type",
                "in-memory")
        cfg.add(config.Scope.application, "track", "params", {})
        cfg.add(config.Scope.application, "track", "test.mode.enabled", True)
        cfg.add(config.Scope.application, "telemetry", "devices", [])
        cfg.add(config.Scope.application, "telemetry", "params", {})
        cfg.add(config.Scope.application, "mechanic", "car.names",
                ["external"])
        cfg.add(config.Scope.application, "mechanic", "skip.rest.api.check",
                True)
        cfg.add(
            config.Scope.application, "client", "hosts",
            AsyncDriverTests.Holder(all_hosts={"default": ["localhost:9200"]}))
        cfg.add(config.Scope.application, "client", "options",
                AsyncDriverTests.Holder(all_client_options={"default": {}}))

        params.register_param_source_for_name("bulk-param-source",
                                              AsyncDriverTestParamSource)

        task = track.Task(
            name="bulk-index",
            operation=track.Operation(
                "bulk-index",
                track.OperationType.Bulk.name,
                params={
                    "body": ["action_metadata_line", "index_line"],
                    "action-metadata-present": True,
                    "bulk-size": 1,
                    # we need this because the parameter source does not know that we only have one
                    # bulk and hence size() returns incorrect results
                    "size": 1
                },
                param_source="bulk-param-source"),
            warmup_iterations=0,
            iterations=1,
            clients=1)

        current_challenge = track.Challenge(name="default",
                                            default=True,
                                            schedule=[task])
        current_track = track.Track(name="unit-test",
                                    challenges=[current_challenge])

        driver = async_driver.AsyncDriver(
            cfg,
            current_track,
            current_challenge,
            es_client_factory_class=StaticClientFactory)

        distribution_flavor, distribution_version, revision = driver.setup()
        self.assertEqual("oss", distribution_flavor)
        self.assertEqual("7.3.0", distribution_version)
        self.assertEqual("de777fa", revision)

        metrics_store_representation = driver.run()

        metric_store = metrics.metrics_store(cfg,
                                             read_only=True,
                                             track=current_track,
                                             challenge=current_challenge)
        metric_store.bulk_add(metrics_store_representation)

        self.assertIsNotNone(
            metric_store.get_one(name="latency",
                                 task="bulk-index",
                                 sample_type=metrics.SampleType.Normal))
        self.assertIsNotNone(
            metric_store.get_one(name="service_time",
                                 task="bulk-index",
                                 sample_type=metrics.SampleType.Normal))
        self.assertIsNotNone(
            metric_store.get_one(name="processing_time",
                                 task="bulk-index",
                                 sample_type=metrics.SampleType.Normal))
        self.assertIsNotNone(
            metric_store.get_one(name="throughput",
                                 task="bulk-index",
                                 sample_type=metrics.SampleType.Normal))
        self.assertIsNotNone(
            metric_store.get_one(name="node_total_young_gen_gc_time",
                                 sample_type=metrics.SampleType.Normal))
        self.assertIsNotNone(
            metric_store.get_one(name="node_total_old_gen_gc_time",
                                 sample_type=metrics.SampleType.Normal))
        # ensure that there are not more documents than we expect
        self.assertEqual(6,
                         len(metric_store.docs),
                         msg=json.dumps(metric_store.docs, indent=2))
Example #16
0
 def search_task(self):
     return track.Task(
         "search-task",
         track.Operation("search-op",
                         operation_type=track.OperationType.Search.
                         to_hyphenated_string()))
Example #17
0
def post_process_for_index_auto_management(t, expected_cluster_health):
    auto_managed_indices = any([index.auto_managed for index in t.indices])
    # spare users this warning for our default tracks
    if auto_managed_indices and t.name not in DEFAULT_TRACKS:
        console.warn("Track [%s] uses index auto-management which will be removed soon. Please add [delete-index] and [create-index] "
                     "tasks at the beginning of each relevant challenge and turn off index auto-management for each index. For details "
                     "please see the migration guide in the docs." % t.name)
    if auto_managed_indices or len(t.templates) > 0:
        for challenge in t.challenges:
            tasks = []
            # TODO: Remove the index settings element. We can do this much better now with the create-index operation.
            create_index_params = {"include-in-reporting": False}
            if challenge.index_settings:
                if t.name not in DEFAULT_TRACKS:
                    console.warn("Track [%s] defines the deprecated property 'index-settings'. Please create indices explicitly with "
                                 "[create-index] and the respective index settings there instead." % t.name)
                create_index_params["settings"] = challenge.index_settings
            if len(t.templates) > 0:
                # check if the user has defined a create index template operation
                user_creates_templates = any(task.matches(track.TaskOpTypeFilter(track.OperationType.CreateIndexTemplate.name))
                                             for task in challenge.schedule)
                # We attempt to still do this automatically but issue a warning so that the user will create it themselves.
                if not user_creates_templates:
                    console.warn("Track [%s] defines %d index template(s) but soon Rally will not create them implicitly anymore. Please "
                                 "add [delete-index-template] and [create-index-template] tasks at the beginning of the challenge %s."
                                 % (t.name, len(t.templates), challenge.name), logger=logger)
                    tasks.append(track.Task(name="auto-delete-index-templates",
                                            operation=track.Operation(name="auto-delete-index-templates",
                                                                      operation_type=track.OperationType.DeleteIndexTemplate.name,
                                                                      params={
                                                                          "include-in-reporting": False,
                                                                          "only-if-exists": True
                                                                      })))
                    tasks.append(track.Task(name="auto-create-index-templates",
                                            operation=track.Operation(name="auto-create-index-templates",
                                                                      operation_type=track.OperationType.CreateIndexTemplate.name,
                                                                      params=create_index_params.copy())))

            if auto_managed_indices:
                tasks.append(track.Task(name="auto-delete-indices",
                                        operation=track.Operation(name="auto-delete-indices",
                                                                  operation_type=track.OperationType.DeleteIndex.name,
                                                                  params={
                                                                      "include-in-reporting": False,
                                                                      "only-if-exists": True
                                                                  })))
                tasks.append(track.Task(name="auto-create-indices",
                                        operation=track.Operation(name="auto-create-indices",
                                                                  operation_type=track.OperationType.CreateIndex.name,
                                                                  params=create_index_params.copy())))

            # check if the user has already defined a cluster-health operation
            user_checks_cluster_health = any(task.matches(track.TaskOpTypeFilter(track.OperationType.ClusterHealth.name))
                                             for task in challenge.schedule)

            if expected_cluster_health != "skip" and not user_checks_cluster_health:
                tasks.append(track.Task(name="auto-check-cluster-health",
                                        operation=track.Operation(name="auto-check-cluster-health",
                                                                  operation_type=track.OperationType.ClusterHealth.name,
                                                                  params={
                                                                      "include-in-reporting": False,
                                                                      "request-params": {
                                                                          "wait_for_status": expected_cluster_health
                                                                      }
                                                                  })))

            challenge.prepend_tasks(tasks)
        return t
    else:
        return t