Ejemplo n.º 1
0
    def transition_when_all_children_responded(self, sender, msg, expected_status, new_status, transition):
        """

        Waits until all children have sent a specific response message and then transitions this actor to a new status.

        :param sender: The child actor that has responded.
        :param msg: The response message.
        :param expected_status: The status in which this actor should be upon calling this method.
        :param new_status: The new status once all child actors have responded.
        :param transition: A parameter-less function to call immediately after changing the status.
        """
        if self.is_current_status_expected(expected_status):
            self.received_responses.append(msg)
            response_count = len(self.received_responses)
            expected_count = len(self.children)

            self.logger.debug("[%d] of [%d] child actors have responded for transition from [%s] to [%s].",
                              response_count, expected_count, self.status, new_status)
            if response_count == expected_count:
                self.logger.debug("All [%d] child actors have responded. Transitioning now from [%s] to [%s].",
                                  expected_count, self.status, new_status)
                # all nodes have responded, change status
                self.status = new_status
                self.received_responses = []
                transition()
            elif response_count > expected_count:
                raise exceptions.RallyAssertionError(
                    "Received [%d] responses but only [%d] were expected to transition from [%s] to [%s]. The responses are: %s" %
                    (response_count, expected_count, self.status, new_status, self.received_responses))
        else:
            raise exceptions.RallyAssertionError("Received [%s] from [%s] but we are in status [%s] instead of [%s]." %
                                                 (type(msg), sender, self.status, expected_status))
Ejemplo n.º 2
0
    def drive(self):
        task = None
        # skip non-tasks in the task list
        while task is None:
            task = self.tasks[self.current_task]
            self.current_task += 1

        if isinstance(task, JoinPoint):
            logger.info("client [%d] reached join point [%s]." %
                        (self.client_id, task))
            # clients that don't execute tasks don't need to care about waiting
            if self.executor_future is not None:
                self.executor_future.result()
            self.send_samples()
            self.executor_future = None
            self.sampler = None
            self.send(self.master, JoinPointReached(self.client_id, task))
        elif isinstance(task, track.Task):
            logger.info("Client [%d] is executing [%s]." %
                        (self.client_id, task))
            self.sampler = Sampler(self.client_id, task.operation,
                                   self.start_timestamp)
            schedule = schedule_for(self.track, task, self.client_id)
            self.executor_future = self.pool.submit(execute_schedule, schedule,
                                                    self.es, self.sampler)
            self.wakeupAfter(
                datetime.timedelta(
                    seconds=LoadGenerator.WAKEUP_INTERVAL_SECONDS))
        else:
            raise exceptions.RallyAssertionError("Unknown task type [%s]" %
                                                 type(task))
Ejemplo n.º 3
0
def create(cfg, metrics_store, node_ip, node_http_port, all_node_ips, all_node_ids, sources=False, distribution=False,
           external=False, docker=False):
    race_root_path = paths.race_root(cfg)
    node_ids = cfg.opts("provisioning", "node.ids", mandatory=False)
    node_name_prefix = cfg.opts("provisioning", "node.name.prefix")
    car, plugins = load_team(cfg, external)

    if sources or distribution:
        s = supplier.create(cfg, sources, distribution, car, plugins)
        p = []
        all_node_names = ["%s-%s" % (node_name_prefix, n) for n in all_node_ids]
        for node_id in node_ids:
            node_name = "%s-%s" % (node_name_prefix, node_id)
            p.append(
                provisioner.local(cfg, car, plugins, node_ip, node_http_port, all_node_ips,
                                  all_node_names, race_root_path, node_name))
        l = launcher.ProcessLauncher(cfg)
    elif external:
        raise exceptions.RallyAssertionError("Externally provisioned clusters should not need to be managed by Rally's mechanic")
    elif docker:
        if len(plugins) > 0:
            raise exceptions.SystemSetupError("You cannot specify any plugins for Docker clusters. Please remove "
                                              "\"--elasticsearch-plugins\" and try again.")
        s = lambda: None
        p = []
        for node_id in node_ids:
            node_name = "%s-%s" % (node_name_prefix, node_id)
            p.append(provisioner.docker(cfg, car, node_ip, node_http_port, race_root_path, node_name))
        l = launcher.DockerLauncher(cfg)
    else:
        # It is a programmer error (and not a user error) if this function is called with wrong parameters
        raise RuntimeError("One of sources, distribution, docker or external must be True")

    return Mechanic(cfg, metrics_store, s, p, l)
Ejemplo n.º 4
0
def iteration_count_based(target_throughput, warmup_iterations, iterations,
                          runner, params):
    """
    Calculates the necessary schedule based on a given number of iterations.

    :param target_throughput: The desired target throughput in operations / second or None if throughput should not be limited.
    :param warmup_iterations: The number of warmup iterations to run. 0 if no warmup should be performed.
    :param iterations: The number of measurement iterations to run.
    :param runner: The runner for a given operation.
    :param params: The parameter source for a given operation.
    :return: A generator for the corresponding parameters.
    """
    wait_time = 1 / target_throughput if target_throughput else 0
    total_iterations = warmup_iterations + iterations
    if total_iterations == 0:
        raise exceptions.RallyAssertionError(
            "Operation must run at least for one iteration.")
    for i in range(0, warmup_iterations):
        yield (wait_time * i, lambda start: metrics.SampleType.Warmup, i,
               total_iterations, runner, params.params())

    for i in range(0, iterations):
        yield (wait_time * (warmup_iterations + i),
               lambda start: metrics.SampleType.Normal, i, total_iterations,
               runner, params.params())
Ejemplo n.º 5
0
 def partition(self, partition_index, total_partitions):
     if self.total_partitions is None:
         self.total_partitions = total_partitions
     elif self.total_partitions != total_partitions:
         raise exceptions.RallyAssertionError(
             f"Total partitions is expected to be [{self.total_partitions}] but was [{total_partitions}]")
     self.partitions.append(partition_index)
Ejemplo n.º 6
0
    def after_request(self, now, weight, unit, request_meta_data):
        if weight > 0 and (self.first_request
                           or self.current_weight != weight):
            expected_unit = self.task.target_throughput.unit
            actual_unit = f"{unit}/s"
            if actual_unit != expected_unit:
                # *temporary* workaround to convert mismatching units to ops/s to stay backwards-compatible.
                #
                # This ensures that we throttle based on ops/s but report based on the original unit (as before).
                if expected_unit == "ops/s":
                    weight = 1
                    if self.first_request:
                        logging.getLogger(__name__).warning(
                            "Task [%s] throttles based on [%s] but reports [%s]. Please specify the target throughput in [%s] instead.",
                            self.task,
                            expected_unit,
                            actual_unit,
                            actual_unit,
                        )
                else:
                    raise exceptions.RallyAssertionError(
                        f"Target throughput for [{self.task}] is specified in "
                        f"[{expected_unit}] but the task throughput is measured "
                        f"in [{actual_unit}].")

            self.first_request = False
            self.current_weight = weight
            # throughput in requests/s for this client
            target_throughput = self.task.target_throughput.value / self.task.clients / self.current_weight
            self.scheduler = self.scheduler_class(self.task, target_throughput)
Ejemplo n.º 7
0
def _do_wait(es, expected_cluster_status):
    reached_cluster_status = None
    use_wait_for_no_relocating_shards = False
    for attempt in range(10):
        try:
            if use_wait_for_no_relocating_shards:
                result = es.cluster.health(wait_for_status=expected_cluster_status, wait_for_relocating_shards=0, timeout="3s")
            else:
                result = es.cluster.health(wait_for_status=expected_cluster_status, timeout="3s",
                                           params={"wait_for_no_relocating_shards": True})

        except (socket.timeout, elasticsearch.exceptions.ConnectionError):
            pass
        except elasticsearch.exceptions.TransportError as e:
            if 400 <= e.status_code < 500:
                logger.exception("Client error in health API. Using 'wait_for_no_relocating_shards'.")
                use_wait_for_no_relocating_shards = True
        else:
            reached_cluster_status = result["status"]
            relocating_shards = result["relocating_shards"]
            logger.info("GOT: %s" % str(result))
            logger.info("ALLOC:\n%s" % es.cat.allocation(v=True))
            logger.info("RECOVERY:\n%s" % es.cat.recovery(v=True))
            logger.info("SHARDS:\n%s" % es.cat.shards(v=True))
            if reached_cluster_status == expected_cluster_status and relocating_shards == 0:
                return reached_cluster_status, relocating_shards
            else:
                time.sleep(0.5)
    msg = "Cluster did not reach status [%s]. Last reached status: [%s]" % (expected_cluster_status, reached_cluster_status)
    logger.error(msg)
    raise exceptions.RallyAssertionError(msg)
Ejemplo n.º 8
0
    def __next__(self):
        if self.conflicting_ids is not None:
            if self.conflict_probability and self.id_up_to > 0 and self.rand() <= self.conflict_probability:
                # a recency of zero means that we don't care about recency and just take a random number
                # within the whole interval.
                if self.recency == 0:
                    idx = self.randint(0, self.id_up_to - 1)
                else:
                    # A recency > 0 biases id selection towards more recent ids. The recency parameter decides
                    # by how much we bias. See docs for the resulting curve.
                    #
                    # idx_range is in the interval [0, 1].
                    idx_range = min(self.randexp(GenerateActionMetaData.RECENCY_SLOPE * self.recency), 1)
                    # the resulting index is in the range [0, self.id_up_to). Note that a smaller idx_range
                    # biases towards more recently used ids (higher indexes).
                    idx = round((self.id_up_to - 1) * (1 - idx_range))

                doc_id = self.conflicting_ids[idx]
                action = self.on_conflict
            else:
                if self.id_up_to >= len(self.conflicting_ids):
                    raise StopIteration()
                doc_id = self.conflicting_ids[self.id_up_to]
                self.id_up_to += 1
                action = "index"

            if action == "index":
                return "index", self.meta_data_index_with_id % doc_id
            elif action == "update":
                return "update", self.meta_data_update_with_id % doc_id
            else:
                raise exceptions.RallyAssertionError("Unknown action [{}]".format(action))
        else:
            return "index", self.meta_data_index_no_id
Ejemplo n.º 9
0
 def union(self, other):
     if self.name != other.name:
         raise exceptions.RallyAssertionError(
             "Both document corpora must have the same name")
     if self is other:
         return self
     else:
         return DocumentCorpus(
             self.name, list(set(self.documents).union(other.documents)))
Ejemplo n.º 10
0
 def receiveMsg_WakeupMessage(self, msg, sender):
     if msg.payload == MechanicActor.WAKEUP_RESET_RELATIVE_TIME:
         self.reset_relative_time()
     elif msg.payload == MechanicActor.WAKEUP_FLUSH_METRICS:
         logger.info("Flushing cluster-wide system metrics store.")
         self.metrics_store.flush(refresh=False)
         self.wakeupAfter(METRIC_FLUSH_INTERVAL_SECONDS, payload=MechanicActor.WAKEUP_FLUSH_METRICS)
     else:
         raise exceptions.RallyAssertionError("Unknown wakeup reason [{}]".format(msg.payload))
Ejemplo n.º 11
0
    def partition(self, partition_index, total_partitions):
        chosen_indices = [idx for idx in self.indices if idx.matches(self.index_name)]
        if not chosen_indices:
            raise exceptions.RallyAssertionError("The provided index [%s] does not match any of the indices [%s]." %
                                                 (self.index_name, ",".join([str(i) for i in self.indices])))

        logger.info("Choosing indices [%s] for partition [%d] of [%d]." %
                    (",".join([str(i) for i in chosen_indices]), partition_index, total_partitions))
        return PartitionBulkIndexParamSource(chosen_indices, partition_index, total_partitions, self.action_metadata,
                                             self.batch_size, self.bulk_size, self.id_conflicts, self.pipeline, self._params)
Ejemplo n.º 12
0
    def __call__(self, es, params):
        source_index = mandatory(params, "source-index", self)
        target_index = mandatory(params, "target-index", self)
        # we need to inject additional settings so we better copy the body
        target_body = deepcopy(mandatory(params, "target-body", self))
        shrink_node = params.get("shrink-node")
        # Choose a random data node if none is specified
        if not shrink_node:
            node_names = []
            # choose a random data node
            for node in es.nodes.info()["nodes"].values():
                if "data" in node["roles"]:
                    node_names.append(node["name"])
            if not node_names:
                raise exceptions.RallyAssertionError(
                    "Could not choose a suitable shrink-node automatically. Please specify it explicitly."
                )
            shrink_node = random.choice(node_names)
        self.logger.info("Using [%s] as shrink node.", shrink_node)
        self.logger.info("Preparing [%s] for shrinking.", source_index)
        # prepare index for shrinking
        es.indices.put_settings(
            index=source_index,
            body={
                "settings": {
                    "index.routing.allocation.require._name": shrink_node,
                    "index.blocks.write": "true"
                }
            },
            preserve_existing=True)

        self.logger.info("Waiting for relocation to finish for index [%s]...",
                         source_index)
        self._wait_for(es, source_index,
                       "shard relocation for index [{}]".format(source_index))
        self.logger.info("Shrinking [%s] to [%s].", source_index, target_index)
        if "settings" not in target_body:
            target_body["settings"] = {}
        target_body["settings"][
            "index.routing.allocation.require._name"] = None
        target_body["settings"]["index.blocks.write"] = None
        # kick off the shrink operation
        es.indices.shrink(index=source_index,
                          target=target_index,
                          body=target_body)

        self.logger.info("Waiting for shrink to finish for index [%s]...",
                         source_index)
        self._wait_for(es, target_index,
                       "shrink for index [{}]".format(target_index))
        self.logger.info("Shrinking [%s] to [%s] has finished.", source_index,
                         target_index)
        # ops_count is not really important for this operation...
        return 1, "ops"
Ejemplo n.º 13
0
 def assert_doc_count(self):
     if self.expected_doc_count is not None:
         stats = self.cluster.indices_stats(index=self.indices,
                                            metric="_all",
                                            level="shards")
         actual_doc_count = stats["_all"]["primaries"]["docs"]["count"]
         if self.expected_doc_count != actual_doc_count:
             msg = "Wrong number of documents: expected %s but got %s. If you benchmark against an external cluster be sure to " \
                   "start with all indices empty." % (self.expected_doc_count, actual_doc_count)
             logger.error(msg)
             raise exceptions.RallyAssertionError(msg)
Ejemplo n.º 14
0
    def union(self, other):
        """
        Creates a new corpus based on the current and the provided other corpus. This is not meant as a generic union
        of two arbitrary corpora but rather to unify the documents referenced by two instances of the same corpus. This
        is useful when two tasks reference different subsets of a corpus and a unified view (e.g. for downloading the
        appropriate document files) is required.

        :param other: The other corpus to unify with this one. Must have the same name and meta-data.
        :return: A document corpus instance with the same and meta-data but with documents from both corpora.
        """
        if self.name != other.name:
            raise exceptions.RallyAssertionError(f"Corpora names differ: [{self.name}] and [{other.name}].")
        if self.meta_data != other.meta_data:
            raise exceptions.RallyAssertionError(f"Corpora meta-data differ: [{self.meta_data}] and [{other.meta_data}].")
        if self is other:
            return self
        else:
            return DocumentCorpus(
                name=self.name, documents=list(set(self.documents).union(other.documents)), meta_data=dict(self.meta_data)
            )
Ejemplo n.º 15
0
 def _wait_for(self, es, idx, description):
     # wait a little bit before the first check
     time.sleep(3)
     result = self.cluster_health(es, params={
         "index": idx,
         "retries": sys.maxsize,
         "request-params": {
             "wait_for_no_relocating_shards": "true"
         }
     })
     if not result["success"]:
         raise exceptions.RallyAssertionError("Failed to wait for [{}].".format(description))
Ejemplo n.º 16
0
def _do_wait(es, es_version, expected_cluster_status):
    reached_cluster_status = None
    relocating_shards = -1
    major, minor, patch, suffix = versions.components(es_version)
    if major < 5:
        use_wait_for_relocating_shards = True
    elif major == 5 and minor == 0 and patch == 0 and suffix and suffix.startswith(
            "alpha"):
        use_wait_for_relocating_shards = True
    else:
        use_wait_for_relocating_shards = False

    for attempt in range(10):
        try:
            if use_wait_for_relocating_shards:
                result = es.cluster.health(
                    wait_for_status=expected_cluster_status,
                    wait_for_relocating_shards=0,
                    timeout="3s")
            else:
                result = es.cluster.health(
                    wait_for_status=expected_cluster_status,
                    timeout="3s",
                    params={"wait_for_no_relocating_shards": True})
        except (socket.timeout, elasticsearch.exceptions.ConnectionError):
            pass
        except elasticsearch.exceptions.TransportError as e:
            if e.status_code == 408:
                logger.info(
                    "Timed out waiting for cluster health status. Retrying shortly..."
                )
                time.sleep(0.5)
            else:
                raise e
        else:
            reached_cluster_status = result["status"]
            relocating_shards = result["relocating_shards"]
            logger.info("GOT: %s" % str(result))
            logger.info("ALLOC:\n%s" % es.cat.allocation(v=True))
            logger.info("RECOVERY:\n%s" % es.cat.recovery(v=True))
            logger.info("SHARDS:\n%s" % es.cat.shards(v=True))
            if reached_cluster_status == expected_cluster_status and relocating_shards == 0:
                return reached_cluster_status, relocating_shards
            else:
                time.sleep(0.5)
    if reached_cluster_status != expected_cluster_status:
        msg = "Cluster did not reach status [%s]. Last reached status: [%s]" % (
            expected_cluster_status, reached_cluster_status)
    else:
        msg = "Cluster reached expected status [%s] but there were [%d] relocating shards and we require zero relocating shards " \
              "(Use the /_cat/shards API to check which shards are relocating.)" % (reached_cluster_status, relocating_shards)
    logger.error(msg)
    raise exceptions.RallyAssertionError(msg)
Ejemplo n.º 17
0
    def send_to_children_and_transition(self, sender, msg, expected_status, new_status):
        """

        Sends the provided message to all child actors and immediately transitions to the new status.

        :param sender: The actor from which we forward this message (in case it is message forwarding). Otherwise our own address.
        :param msg: The message to send.
        :param expected_status: The status in which this actor should be upon calling this method.
        :param new_status: The new status.
        """
        if self.is_current_status_expected(expected_status):
            self.logger.info("Transitioning from [%s] to [%s].", self.status, new_status)
            self.status = new_status
            for m in filter(None, self.children):
                self.send(m, msg)
        else:
            raise exceptions.RallyAssertionError("Received [%s] from [%s] but we are in status [%s] instead of [%s]." %
                                                 (type(msg), sender, self.status, expected_status))
Ejemplo n.º 18
0
    def used_corpora(self, t, params):
        corpora = []
        track_corpora_names = [corpus.name for corpus in t.corpora]
        corpora_names = params.get("corpora", track_corpora_names)
        if isinstance(corpora_names, str):
            corpora_names = [corpora_names]

        for corpus in t.corpora:
            if corpus.name in corpora_names:
                filtered_corpus = corpus.filter(source_format=track.Documents.SOURCE_FORMAT_BULK, target_indices=params.get("indices"))
                if filtered_corpus.number_of_documents(source_format=track.Documents.SOURCE_FORMAT_BULK) > 0:
                    corpora.append(filtered_corpus)

        # the track has corpora but none of them match
        if t.corpora and not corpora:
            raise exceptions.RallyAssertionError("The provided corpus %s does not match any of the corpora %s." %
                                                 (corpora_names, track_corpora_names))

        return corpora
Ejemplo n.º 19
0
    def __next__(self):
        if self.conflicting_ids is not None:
            if self.conflict_probability and self.id_up_to > 0 and self.rand() <= self.conflict_probability:
                doc_id = self.conflicting_ids[self.randint(0, self.id_up_to - 1)]
                action = self.on_conflict
            else:
                if self.id_up_to >= len(self.conflicting_ids):
                    raise StopIteration()
                doc_id = self.conflicting_ids[self.id_up_to]
                self.id_up_to += 1
                action = "index"

            if action == "index":
                return "index", '{"index": {"_index": "%s", "_type": "%s", "_id": "%s"}}' % (self.index_name, self.type_name, doc_id)
            elif action == "update":
                return "update", '{"update": {"_index": "%s", "_type": "%s", "_id": "%s"}}' % (self.index_name, self.type_name, doc_id)
            else:
                raise exceptions.RallyAssertionError("Unknown action [{}]".format(action))
        else:
            return "index", '{"index": {"_index": "%s", "_type": "%s"}}' % (self.index_name, self.type_name)
Ejemplo n.º 20
0
 def receiveMsg_WakeupMessage(self, msg, sender):
     if msg.payload == MechanicActor.WAKEUP_RESET_RELATIVE_TIME:
         self.reset_relative_time()
     else:
         raise exceptions.RallyAssertionError("Unknown wakeup reason [{}]".format(msg.payload))
Ejemplo n.º 21
0
def _do_wait(es, expected_cluster_status, sleep=time.sleep):
    import elasticsearch
    from enum import Enum
    from functools import total_ordering

    @total_ordering
    class ClusterHealthStatus(Enum):
        UNKNOWN = 0
        RED = 1
        YELLOW = 2
        GREEN = 3

        def __lt__(self, other):
            if self.__class__ is other.__class__:
                return self.value < other.value
            return NotImplemented

    def status(v):
        try:
            return ClusterHealthStatus[v.upper()]
        except (KeyError, AttributeError):
            return ClusterHealthStatus.UNKNOWN

    reached_cluster_status = None
    relocating_shards = -1
    major, minor, patch, suffix = versions.components(
        es.info()["version"]["number"])
    if major < 5:
        use_wait_for_relocating_shards = True
    elif major == 5 and minor == 0 and patch == 0 and suffix and suffix.startswith(
            "alpha"):
        use_wait_for_relocating_shards = True
    else:
        use_wait_for_relocating_shards = False

    max_attempts = 10
    for attempt in range(max_attempts):
        try:
            # Is this the last attempt? Then just retrieve the status
            if attempt + 1 == max_attempts:
                result = es.cluster.health()
            elif use_wait_for_relocating_shards:
                result = es.cluster.health(
                    wait_for_status=expected_cluster_status,
                    timeout="3s",
                    params={"wait_for_relocating_shards": 0})
            else:
                result = es.cluster.health(
                    wait_for_status=expected_cluster_status,
                    timeout="3s",
                    wait_for_no_relocating_shards=True)
        except (socket.timeout, elasticsearch.exceptions.ConnectionError):
            pass
        except elasticsearch.exceptions.TransportError as e:
            if e.status_code == 408:
                logger.info(
                    "Timed out waiting for cluster health status. Retrying shortly..."
                )
                sleep(0.5)
            else:
                raise e
        else:
            reached_cluster_status = result["status"]
            relocating_shards = result["relocating_shards"]
            logger.info("GOT: %s" % str(result))
            logger.info("ALLOC:\n%s" % es.cat.allocation(v=True))
            logger.info("RECOVERY:\n%s" % es.cat.recovery(v=True))
            logger.info("SHARDS:\n%s" % es.cat.shards(v=True))
            if status(reached_cluster_status) >= status(
                    expected_cluster_status) and relocating_shards == 0:
                return reached_cluster_status, relocating_shards
            else:
                sleep(0.5)
    if status(reached_cluster_status) < status(expected_cluster_status):
        msg = "Cluster did not reach status [%s]. Last reached status: [%s]" % (
            expected_cluster_status, reached_cluster_status)
    else:
        msg = "Cluster reached status [%s] which is equal or better than the expected status [%s] but there were [%d] relocating shards " \
              "and we require zero relocating shards (Use the /_cat/shards API to check which shards are relocating.)" % \
              (reached_cluster_status, expected_cluster_status, relocating_shards)
    logger.error(msg)
    raise exceptions.RallyAssertionError(msg)