def deploy(self, cluster_size, new_cluster):
        if new_cluster:
            if self.use_blockade:
                subprocess.call([
                    "bash", "../cluster/deploy-blockade-cluster.sh",
                    cluster_size, self.image_version
                ])
                console_out(f"Waiting for cluster to establish itself...",
                            "TEST RUNNER")
                time.sleep(30)
                console_out(f"Cluster status:", "TEST RUNNER")
                subprocess.call(["bash", "../cluster/cluster-status.sh"])
            else:
                subprocess.call([
                    "bash", "../cluster/deploy-compose-cluster.sh",
                    cluster_size, self.image_version
                ])
                console_out(f"Waiting for cluster to establish itself...",
                            "TEST RUNNER")
                time.sleep(30)
                console_out(f"Cluster status:", "TEST RUNNER")
                subprocess.call(["bash", "../cluster/cluster-status-dc.sh"])

            self.load_initial_nodes()
            self.correct_advertised_listeners()
        else:
            console_out(f"Using existing cluster...", "TEST RUNNER")
            if self.use_blockade:
                subprocess.call(["bash", "../cluster/cluster-status.sh"])
            else:
                subprocess.call(["bash", "../cluster/cluster-status-dc.sh"])

            self.load_initial_nodes()
 def create_topic(self, broker, topic_name, replication_factor, partitions,
                  min_insync_reps, unclean_failover):
     try:
         if self.image_version == "confluent":
             subprocess.call([
                 "bash", "../cluster/cp-create-topic.sh", broker,
                 topic_name,
                 str(replication_factor),
                 str(partitions),
                 str(min_insync_reps),
                 str(unclean_failover)
             ])
         elif self.image_version == "wurstmeister":
             subprocess.call([
                 "bash", "../cluster/wm-create-topic.sh", broker,
                 topic_name,
                 str(replication_factor),
                 str(partitions),
                 str(min_insync_reps),
                 str(unclean_failover)
             ])
         else:
             raise ValueError("Non-supported Kafka image")
         return True
     except Exception as e:
         console_out(f"Could not create topic: {e}", "TEST RUNNER")
         return False
Exemple #3
0
    def start_producing(self, topic, msg_count):
        console_out(f"{self.key_count} sequences of a possible {len(self.keys)} to be sent", self.get_actor())
        for msg_index in range(0, msg_count):
            if self.terminate:
                break

            self.producer.poll(0)

            body = None
            if self.message_type == "partitioned-sequence":
                topic = f"{topic}-{self.keys[self.key_index]}"
                body = f"{self.keys[self.key_index]}={self.val}"
            elif self.message_type == "sequence":
                body = f"{self.keys[self.key_index]}={self.val}"
            else:
                body = uuid.uuid4()

            while len(self.pending_ack) > self.in_flight_limit:
                time.sleep(0.2)
                if self.terminate:
                    break
                self.producer.poll(0)

            self.producer.produce(topic, value=body.encode('utf-8'),key=self.keys[self.key_index], callback=self.delivery_report)
            self.pending_ack.add(body)
            self.curr_pos += 1

            self.key_index += 1
            if self.key_index == self.key_count:
                self.key_index = 0
                self.val += 1
    def correct_advertised_listeners(self):
        for broker in self.init_live_nodes:
            broker_ip = self.get_node_ip(broker)
            kf_index = broker.index("kafka")
            broker_id = int(broker[kf_index + 5:kf_index + 6])
            port = self.get_node_port(broker)
            try:
                if self.image_version == "confluent":
                    subprocess.call([
                        "bash", "../cluster/cp-correct-adv-listener.sh",
                        broker,
                        str(broker_id), broker_ip, port
                    ])
                elif self.image_version == "wurstmeister":
                    subprocess.call([
                        "bash", "../cluster/wm-correct-adv-listener.sh",
                        broker,
                        str(broker_id), broker_ip
                    ])
                else:
                    raise ValueError("Non-supported Kafka image")
            except Exception as e:
                console_out(f"Could not correct advtertised listener: {e}",
                            "TEST RUNNER")
                return False

        return True
Exemple #5
0
    def run_benchmark(self, unique_conf, common_conf, playlist_entry, policies,
                      run_ordinal):
        status_id = unique_conf.technology + unique_conf.node_number

        federation_args = ""
        if common_conf.federation_enabled:
            ds_node_number = int(unique_conf.node_number) + 100 + x
            ds_broker_ips = self.get_broker_ips(unique_conf.technology,
                                                ds_node_number,
                                                unique_conf.cluster_size,
                                                common_conf.run_tag)
            federation_args += f"--downstream-broker-hosts {ds_broker_ips}"

        script = "run-logged-aws-benchmark.sh"
        # TODO: make these contexts not use hard-coded regions and cluster names
        if unique_conf.deployment == "eks":
            context = f"{unique_conf.deployment_user}@benchmarking-eks.eu-west-1.eksctl.io"
            script = "run-logged-aws-k8s-benchmark.sh"
        elif unique_conf.deployment == "gke":
            context = f"gke_{unique_conf.deployment_user}_europe-west4-a_benchmarking-gke"
            script = "run-logged-aws-k8s-benchmark.sh"
        else:
            context = "none"

        cluster_name = f"rmq-{unique_conf.deployment}"

        self._benchmark_status[status_id] = "started"
        exit_code = subprocess.call([
            "bash", script, unique_conf.node_number, common_conf.key_pair,
            unique_conf.technology, unique_conf.broker_version,
            unique_conf.instance, unique_conf.volume1_type,
            unique_conf.filesystem, common_conf.hosting, unique_conf.tenancy,
            common_conf.password, common_conf.postgres_url,
            common_conf.postgres_user, common_conf.postgres_pwd,
            playlist_entry.topology, common_conf.run_id, common_conf.username,
            common_conf.password, common_conf.run_tag, unique_conf.core_count,
            unique_conf.threads_per_core, unique_conf.config_tag,
            str(unique_conf.cluster_size), unique_conf.no_tcp_delay, policies,
            str(common_conf.override_step_seconds),
            str(common_conf.override_step_repeat),
            str(common_conf.override_step_msg_limit),
            common_conf.override_broker_hosts, unique_conf.pub_connect_to_node,
            unique_conf.con_connect_to_node,
            str(unique_conf.pub_heartbeat_sec),
            str(unique_conf.con_heartbeat_sec), common_conf.mode,
            str(common_conf.grace_period_sec), common_conf.warmUpSeconds,
            common_conf.checks,
            str(run_ordinal), common_conf.tags, common_conf.attempts,
            common_conf.influx_subpath,
            playlist_entry.get_topology_variables(),
            playlist_entry.get_policy_variables(), federation_args, context,
            cluster_name, unique_conf.memory_gb
        ])

        if exit_code != 0:
            console_out(self.actor,
                        f"Benchmark {unique_conf.node_number} failed")
            self._benchmark_status[status_id] = "failed"
        else:
            self._benchmark_status[status_id] = "success"
Exemple #6
0
    def run_background_load(self, unique_conf, common_conf, topology, policies,
                            step_seconds, step_repeat, delay_seconds):
        if delay_seconds > 0:
            console_out(
                self.actor,
                f"Delaying start of background load by {delay_seconds} seconds for {unique_conf.node_number}"
            )
            time.sleep(delay_seconds)

        console_out(self.actor,
                    f"Starting background load for {unique_conf.node_number}")
        status_id = unique_conf.technology + unique_conf.node_number

        broker_user = "******"
        broker_password = common_conf.password

        if policies == "":
            policies = "none"

        subprocess.call([
            "bash", "run-background-load-aws.sh", broker_user, broker_password,
            str(unique_conf.cluster_size), common_conf.key_pair,
            unique_conf.node_number, policies,
            str(step_seconds),
            str(step_repeat), common_conf.run_tag, unique_conf.technology,
            topology, unique_conf.broker_version
        ])
Exemple #7
0
    def stop_start_consumer(self, con_index, hard_close):
        con = self.consumers[con_index]
        try:
            if self.use_toxiproxy:
                console_out(
                    f"SIMULATING CRASH OF CONSUMER {con_index+1} --------------------------------------",
                    self.actor)
                self.broker_manager.disable_consumer_proxy(
                    con.get_consumer_id())
                time.sleep(1)
                con.perform_hard_close()
                time.sleep(1)
                self.broker_manager.enable_consumer_proxy(
                    con.get_consumer_id())
            else:
                console_out(
                    f"STOPPING CONSUMER {con_index+1} --------------------------------------",
                    self.actor)
                if hard_close:
                    con.perform_hard_close()
                else:
                    con.stop_consuming()
            self.consumer_threads[con_index].join(15)

            con.connect()
            self.consumer_threads[con_index] = threading.Thread(
                target=con.consume)
            self.consumer_threads[con_index].start()
        except Exception as e:
            console_out_exception("Failed to stop/start consumer correctly", e,
                                  self.actor)
 def on_connection_closed(self, connection, reason):
     self.channel = None
     if self.stopping:
         self.connection.ioloop.stop()                
     else:
         console_out(f"Connection closed. Reason: {reason}. Reopening in 5 seconds.", self.get_actor())
         self.connection.ioloop.call_later(5, self.connection.ioloop.stop)
Exemple #9
0
    def parallel_deploy(self, configurations, common_conf):
        d_threads = list()

        for config_tag in configurations:
            unique_conf_list = configurations[config_tag]
            for i in range(len(unique_conf_list)):
                unique_conf = unique_conf_list[i]
                if common_conf.no_deploy:
                    deploy = threading.Thread(target=self.update_single, args=(unique_conf, common_conf,))
                else:
                    deploy = threading.Thread(target=self.deploy_rabbitmq_cluster, args=(unique_conf, common_conf,))
                    # if unique_conf.cluster_size == 1:
                    #     deploy = threading.Thread(target=self.deploy_single, args=(unique_conf, common_conf,))
                    # else:
                    #     deploy = threading.Thread(target=self.deploy_rabbitmq_cluster, args=(unique_conf, common_conf,))

                d_threads.append(deploy)

        for dt in d_threads:
            dt.start()

        for dt in d_threads:
            dt.join()

        for config_tag in configurations:
            unique_conf_list = configurations[config_tag]

            for p in range(len(unique_conf_list)):
                unique_conf = unique_conf_list[p]
                status_id1 = unique_conf.technology + unique_conf.node_number

                if self._deploy_status[status_id1] != "success":
                    console_out(self.actor, f"Deployment failed for node {unique_conf.technology}{unique_conf.node_number}")
                    if not common_conf.no_deploy:
                        self.teardown_all(configurations, common_conf, False)
 def deploy_joinee(self, status_id, unique_conf, common_conf, node,
                   node_range_start, node_range_end):
     exit_code = subprocess.call([
         "bash", "deploy-rmq-cluster-broker.sh", unique_conf.broker_version,
         unique_conf.core_count, unique_conf.filesystem,
         unique_conf.generic_unix_url, common_conf.influx_subpath,
         unique_conf.instance, common_conf.key_pair, common_conf.log_level,
         str(node),
         str(node_range_end),
         str(node_range_start), "joinee", common_conf.run_tag,
         common_conf.broker_sg, common_conf.subnet, unique_conf.tenancy,
         unique_conf.threads_per_core, unique_conf.vars_file,
         unique_conf.data_volume, unique_conf.logs_volume,
         unique_conf.quorum_volume, unique_conf.wal_volume,
         unique_conf.volume1_size, unique_conf.volume1_mountpoint,
         unique_conf.volume2_size, unique_conf.volume2_mountpoint,
         unique_conf.volume3_size, unique_conf.volume3_mountpoint
     ],
                                 cwd="../deploy/aws")
     if exit_code != 0:
         console_out(
             self.actor,
             f"deploy of joinee rabbitmq{node} failed with exit code {exit_code}"
         )
         self._deploy_status[status_id] = "failed"
Exemple #11
0
 def configure_large_msgs_direct(self, queue, count, dup_rate, msg_size):
     self.large_msg = self.repeat_to_length("1234567890", msg_size)
     self.routing_key = queue
     self.exchanges = [""]
     console_out(f"Will publish large messages to queue {queue}",
                 self.get_actor())
     self.configure(count, dup_rate, "large-msgs")
 def start_consumers(self):
     for con_id in range(1, len(self.consumers) + 1):
         con_thread = threading.Thread(
             target=self.consumers[con_id - 1].start_consuming)
         con_thread.start()
         self.consumer_threads.append(con_thread)
         console_out(f"consumer {con_id} started", self.actor)
Exemple #13
0
    def restart_all_brokers(self, configurations, common_conf):
        r_threads = list()
        for config_tag in configurations:
            console_out(self.actor, f"BROKER RESTART FOR configuration {config_tag}")
            unique_conf_list = configurations[config_tag]
            # iterate over configurations
            for p in range(len(unique_conf_list)):
                unique_conf = unique_conf_list[p]
                # iterate over nodes of this configuration
                for n in range(unique_conf.cluster_size):
                    node = int(unique_conf.node_number) + n
                    restart = threading.Thread(target=self.restart_broker, args=(unique_conf.technology, str(node), common_conf))
                    r_threads.append(restart)

        for rt in r_threads:
            rt.start()
        
        for rt in r_threads:
            rt.join()
        
        for config_tag in configurations:
            unique_conf_list = configurations[config_tag]
            
            for p in range(len(unique_conf_list)):
                unique_conf = unique_conf_list[p]
                for n in range(unique_conf.cluster_size):
                    node = int(unique_conf.node_number) + n
                    status_id = f"{unique_conf.technology}{node}"
                
                    if self._action_status[status_id] != "success":
                        console_out(self.actor, f"Broker restart failed for node {unique_conf.technology}{node}")
                        if not common_conf.no_deploy:
                            self._deployer.teardown_all(configurations, common_conf, False)
Exemple #14
0
    def deploy_single(self, unique_conf, common_conf):
        status_id = unique_conf.technology + unique_conf.node_number
        self._deploy_status[status_id] = "started"
        volume_type = unique_conf.volume.split("-")[1]
        exit_code = subprocess.call([
            "bash", "deploy-single-broker.sh", common_conf.ami,
            unique_conf.broker_version, unique_conf.core_count,
            unique_conf.filesystem, unique_conf.generic_unix_url,
            unique_conf.instance, common_conf.key_pair,
            common_conf.loadgen_instance, common_conf.loadgen_sg,
            common_conf.log_level, unique_conf.node_number,
            common_conf.run_tag, common_conf.broker_sg, common_conf.subnet,
            unique_conf.technology, unique_conf.tenancy,
            unique_conf.threads_per_core, unique_conf.vars_file,
            unique_conf.volume_size, volume_type
        ],
                                    cwd="../deploy/aws")

        if exit_code != 0:
            console_out(
                self.actor,
                f"deploy {unique_conf.node_number} failed with exit code {exit_code}"
            )
            self._deploy_status[status_id] = "failed"
        else:
            self._deploy_status[status_id] = "success"
Exemple #15
0
 def on_channel_closed(self, channel, reason):
     console_out(f"Channel {channel} was closed. Reason: {reason}",
                 self.get_actor())
     self._channel = None
     if not self._stopping:
         if self._connection.is_open:
             self._connection.close()
    def stop_one_broker(self, configurations, common_conf):
        r_threads = list()
        for config_tag in configurations:
            console_out(self.actor,
                        f"BROKER SHUTDOWN FOR configuration {config_tag}")
            unique_conf_list = configurations[config_tag]
            # iterate over configurations
            for p in range(len(unique_conf_list)):
                unique_conf = unique_conf_list[p]
                restart = threading.Thread(target=self.stop_broker,
                                           args=(unique_conf.technology,
                                                 str(unique_conf.node_number),
                                                 common_conf))
                r_threads.append(restart)

        for rt in r_threads:
            rt.start()

        for rt in r_threads:
            rt.join()

        for config_tag in configurations:
            unique_conf_list = configurations[config_tag]

            for p in range(len(unique_conf_list)):
                unique_conf = unique_conf_list[p]
                status_id = f"{unique_conf.technology}{unique_conf.node_number}"
                if self._action_status[status_id] != "success":
                    console_out(
                        self.actor,
                        f"Broker shutdown failed for node {unique_conf.technology}{unique_conf.node_number}"
                    )
                    if not common_conf.no_deploy:
                        self._deployer.teardown_all(configurations,
                                                    common_conf, False)
 def start_publishers(self):
     for prod_id in range(1, len(self.publishers) + 1):
         pub_thread = threading.Thread(
             target=self.publishers[prod_id - 1].start_publishing)
         pub_thread.start()
         self.publisher_threads.append(pub_thread)
         console_out(f"Publisher {prod_id} started", self.actor)
Exemple #18
0
    def create_standard_queue(self, mgmt_node, queue_name, replication_factor):
        try:
            mgmt_node_ip = self.get_mgmt_node_ip(mgmt_node)
            queue_node = "rabbit@" + mgmt_node

            r = requests.put(
                'http://' + mgmt_node_ip + ':15672/api/queues/%2F/' +
                queue_name,
                data=
                "{\"auto_delete\":false,\"durable\":true,\"arguments\":{\"x-single-active-consumer\": false},\"node\":\""
                + queue_node + "\"}",
                auth=('jack', 'jack'))

            r = requests.put(
                'http://' + mgmt_node_ip + ':15672/api/policies/%2F/ha-queues',
                data="{\"pattern\":\"" + queue_name +
                "\", \"definition\": {\"ha-mode\":\"exactly\", \"ha-params\": "
                + str(replication_factor) +
                ",\"ha-sync-mode\":\"automatic\" }, \"priority\":0, \"apply-to\": \"queues\"}",
                auth=('jack', 'jack'))

            console_out(f"Created {queue_name} with response code {r}",
                        "TEST_RUNNER")

            return r.status_code == 201 or r.status_code == 204
        except Exception as e:
            console_out("Could not create queue. Will retry. " + str(e),
                        "TEST RUNNER")
            return False
Exemple #19
0
    def run_background_load(self, unique_conf, common_conf):
        console_out(self.actor, f"Starting background load for {unique_conf.node_number}")
        status_id = unique_conf.technology + unique_conf.node_number

        broker_user = "******"
        broker_password = common_conf.password
        topology = common_conf.background_topology_file
        policies = common_conf.background_policies_file
        step_seconds = str(common_conf.background_step_seconds)
        step_repeat = str(common_conf.background_step_repeat)

        nodes = ""
        for x in range(int(unique_conf.cluster_size)):
            comma = ","
            if x == 0:
                comma = ""

            node_number = int(unique_conf.node_number) + x
            nodes = f"{nodes}{comma}{node_number}"

        self._benchmark_status[status_id] = "started"
        subprocess.Popen(["bash", "run-background-load-aws.sh", 
                        broker_user, 
                        broker_password, 
                        str(unique_conf.cluster_size), 
                        common_conf.key_pair, 
                        unique_conf.node_number, 
                        nodes, 
                        policies, 
                        step_seconds, 
                        step_repeat, 
                        common_conf.run_tag, 
                        unique_conf.technology, 
                        topology, 
                        unique_conf.broker_version])
    def publish_msg_with_existing_conn(self, send_to_exchange, rk, body):
        mandatory = False
        if self.use_confirms:
            mandatory = True

        corr_id = str(uuid.uuid4())

        try:
            self.channel.basic_publish(exchange=send_to_exchange,
                                       routing_key=rk,
                                       body=body,
                                       mandatory=mandatory,
                                       properties=pika.BasicProperties(
                                           content_type='text/plain',
                                           delivery_mode=2,
                                           correlation_id=corr_id))
            self.pos_acks += 1
        except exceptions.UnroutableError:
            self.undeliverable += 1
            if self.undeliverable % 100 == 0:
                console_out(
                    f"{str(self.undeliverable)} messages could not be delivered",
                    self.get_actor())
        except exceptions.NackError:
            self.neg_acks += 1
Exemple #21
0
    def restart_broker(self, technology, node, common_conf):
        status_id = technology + node

        node_name = f"{common_conf.run_tag}-rmq{node}-server"

        command_args = [
            "gcloud", "compute", "ssh", node_name, "--",
            "docker exec $(docker container ls | awk '/rabbitmq/ { print $1 }') rabbitmqctl -l stop_app"
        ]
        result = subprocess.run(command_args)

        if result.returncode != 0:
            console_out(
                self.actor,
                f"Restart (1/2) of broker on node {node} failed with exit code {result.returncode}"
            )
            self._action_status[status_id] = "failed"
            return

        command_args = [
            "gcloud", "compute", "ssh", node_name, "--",
            "docker restart --time 30 $(docker container ls | awk '/rabbitmq/ { print $1 }')"
        ]
        result = subprocess.run(command_args)

        if result.returncode != 0:
            console_out(
                self.actor,
                f"Restart (2/2) of broker on node {node} failed with exit code {result.returncode}"
            )
            self._action_status[status_id] = "failed"
            return

        self._action_status[status_id] = "success"
Exemple #22
0
 def reconnect(self):
     self.connection = None
     self.channel = None
     console_out("Connection is closed. Opening new connection",
                 self.get_actor())
     self.broker_manager.next_node(self.consumer_id)
     return self.connect()
Exemple #23
0
 def on_undeliverable(self, channel, method, properties, body):
     body_str = str(body, "utf-8")
     self.undeliverable += 1
     if self.undeliverable % 100 == 0:
         console_out(
             f"{str(self.undeliverable)} messages could not be delivered",
             self.get_actor())
 def reconnect(self):
     self.connection = None
     self.channel = None
     console_out("Connection is closed. Opening new connection",
                 self.get_actor())
     self.next_node()
     return self.connect()
def get_playlist_entries(playlist_file):
    pl_file = open(playlist_file, "r")
    playlist_json = json.loads(pl_file.read())
    common_attr = playlist_json["commonAttributes"]

    playlist_entries = list()

    # load topologies to run and check topology and policy files exist
    for playlist_entry in playlist_json['benchmarks']:
        entry = PlaylistEntry()
        entry.topology = get_entry_mandatory_field(playlist_entry, common_attr, "topology")
        entry.topology_variables = get_variables(playlist_entry, common_attr, "topologyVariables")
        entry.policy = get_entry_mandatory_field(playlist_entry, common_attr, "policy")
        entry.policy_variables = get_variables(playlist_entry, common_attr, "policyVariables")
        
        entry.has_broker_actions = get_entry_optional_field(playlist_entry, common_attr, "hasBrokerActions", False)
        if entry.has_broker_actions:
            entry.broker_action = get_entry_mandatory_field(playlist_entry, common_attr, "brokerAction")
            entry.trigger_type = get_entry_mandatory_field(playlist_entry, common_attr, "triggerType")
            entry.trigger_at = get_entry_mandatory_field(playlist_entry, common_attr, "triggerAt")

        if not os.path.exists("../benchmark/topologies/" + entry.topology):
            console_out("RUNNER", f"The topology file {entry.topology} does not exist")
            exit(1)
        
        if len(entry.policy) > 0 and not os.path.exists("../benchmark/policies/" + entry.policy):
            console_out("RUNNER", f"The policy file {entry.policy} does not exist")
            exit(1)

        playlist_entries.append(entry)

    return playlist_entries
Exemple #26
0
 def configure_hello_msgs_to_exchanges(self, exchanges, routing_key, count,
                                       dup_rate):
     self.exchanges = exchanges
     self.routing_key = routing_key
     console_out(f"Will publish hello msgs to exchanges {exchanges}",
                 self.get_actor())
     self.configure(count, dup_rate, "hello")
Exemple #27
0
 def teardown(self, technology, node, run_tag, no_destroy):
     if no_destroy:
         console_out(self.actor, "No teardown as --no-destroy set to true")
     else:
         try:
             self.__delete_instance(f"{run_tag}-rmq{node}-server")
         except Exception as e:
             console_out(self.actor, f"{e}, ignoring")
Exemple #28
0
 def on_channel_closed(self, channel, reply_code, reply_text):
     console_out(
         f"Channel {channel} was closed. Code: {reply_code} Text: {reply_text}",
         self.get_actor())
     self._channel = None
     if not self._stopping:
         if self._connection.is_open:
             self._connection.close()
Exemple #29
0
    def on_channel_open(self, channel):
        self._channel = channel
        self.add_on_channel_close_callback()
        console_out('Channel opened, publishing to commence', self.get_actor())

        self.reset_ack_tracking()
        self.seq_no = 0
        self.start_publishing()
Exemple #30
0
def interuppt_handler(signum, frame):
    global stop_please, stop_requests
    console_out("STOP REQUESTED", "TEST RUNNER")
    stop_please = True
    stop_requests += 1

    if stop_requests >= 2:
        sys.exit(-2)