Esempio n. 1
0
 def __init__(self, args, suffix):
     self.suffix = suffix
     self.config_tag = get_mandatory_arg(args, "--config-tag", self.suffix)
     self.technology = get_mandatory_arg_validated(args, "--technology",
                                                   self.suffix,
                                                   ["rabbitmq"])
     self.cluster_size = int(
         get_optional_arg(args, "--cluster-size", self.suffix, "1"))
     self.broker_version = get_mandatory_arg(args, "--version", self.suffix)
     self.volume_size = get_optional_arg(args, "--volume-size", self.suffix,
                                         "50")  # for GCP deployment only
     self.filesystem = get_mandatory_arg_validated(args, "--filesystem",
                                                   self.suffix,
                                                   ["ext4", "xfs"])
     self.tenancy = get_mandatory_arg_validated(args, "--tenancy",
                                                self.suffix,
                                                ["default", "dedicated"])
     self.core_count = get_mandatory_arg(args, "--core-count", self.suffix)
     self.threads_per_core = get_mandatory_arg(args, "--threads-per-core",
                                               self.suffix)
     self.vars_file = get_optional_arg(
         args, "--vars-file", self.suffix,
         f".variables/{self.technology}-generic-vars.yml")
     self.no_tcp_delay = get_optional_arg(args, "--no-tcp-delay",
                                          self.suffix, "true")
     self.policies_file = get_optional_arg(args, "--policies-file",
                                           self.suffix, "none")
     self.pub_connect_to_node = get_optional_arg_validated(
         args, "--pub-connect-to-node", self.suffix,
         ["roundrobin", "local", "non-local", "random"], "roundrobin")
     self.con_connect_to_node = get_optional_arg_validated(
         args, "--con-connect-to-node", self.suffix,
         ["roundrobin", "local", "non-local", "random"], "roundrobin")
     self.node_number = -1
def main():
    args = get_args(sys.argv)

    connect_node = get_optional_arg(args, "--node", "rabbitmq1")
    exchange = get_optional_arg(args, "--ex", "")
    count = int(get_mandatory_arg(args, "--msgs"))
    state_count = int(get_mandatory_arg(args, "--keys"))
    dup_rate = float(get_optional_arg(args, "--dup-rate", "0"))
    routing_key = get_optional_arg(args, "--rk", "hello")
    queue = get_optional_arg(args, "--queue", None)
    partitioned = get_optional_arg(args, "--partitioned", "false")
    exchanges_arg = get_optional_arg(args, "--exchanges", "")
        
    message_type = "sequence"
    if partitioned == "true":
        if queue != None:
            print("Cannot set partitioning mode and set a queue. Must publish to an exchange")
            exit(1)
        message_type = "partitioned-sequence"

    live_nodes = get_live_nodes()
    
    publisher = RabbitPublisher("1", live_nodes, connect_node, 1000, 100, 100)

    if queue != None:
        print("direct to queue publishing")
        publisher.publish_direct(queue, count, state_count, dup_rate, message_type)

    elif len(exchanges_arg) > 0:
        print("multi-exchange publishing")
        exchanges = exchanges_arg.split(",")
        publisher.publish_to_exchanges(exchanges, routing_key, count, state_count, dup_rate, message_type)
    else:
        print("single exchange publishing")
        publisher.publish(exchange, routing_key, count, state_count, dup_rate, message_type)
Esempio n. 3
0
    def __init__(self, args, suffix):
        super().__init__(args, suffix)

        self.generic_unix_url = get_mandatory_arg(args, "--generic-unix-url",
                                                  self.suffix)
        self.instance = get_mandatory_arg(args, "--instance", self.suffix)
        self.volume = get_mandatory_arg_validated(
            args, "--volume", self.suffix,
            ["ebs-io1", "ebs-st1", "ebs-gp2", "local-nvme"])
Esempio n. 4
0
    def __init__(self, args):
        super().__init__(args)

        self.gcp_project_id = get_mandatory_arg(args, "--gcp-project-id", "")
        self.network = get_optional_arg(args, "--network", "", "")
        self.subnet = get_optional_arg(args, "--subnet", "", "")
        self.gcp_postgres_connection_name = get_optional_arg(args, "--gcp-postgres-connection-name", "", "")
        self.loadgen_machine_type = get_mandatory_arg(args, "--loadgen-machine-type", "")
        self.loadgen_container_image = get_mandatory_arg(args, "--loadgen-container-image", "")
        self.hosting = "gcp"
    def __init__(self, args, suffix):
        super().__init__(args, suffix)

        self.container_image = get_mandatory_arg(args, "--container-image",
                                                 self.suffix)
        self.container_env = get_optional_arg(args, "--container-env",
                                              self.suffix, "")
        self.machine_type = get_mandatory_arg(args, "--machine-type",
                                              self.suffix)
        self.volume = get_mandatory_arg_validated(args, "--volume",
                                                  self.suffix,
                                                  ["pd-ssd", "pd-standard"])
Esempio n. 6
0
    def __init__(self, args, suffix):
        super().__init__(args, suffix)

        self.generic_unix_url = get_mandatory_arg(args, "--generic-unix-url",
                                                  self.suffix)
        self.instance = get_mandatory_arg(args, "--instance", self.suffix)
        self.volume1_iops_per_gb = get_optional_arg(
            args, "--volume1-iops-per-gb", self.suffix,
            "50")  # only applicable to io1, else ignored
        self.volume2_iops_per_gb = get_optional_arg(
            args, "--volume2-iops-per-gb", self.suffix,
            "50")  # only applicable to io1, else ignored
        self.volume3_iops_per_gb = get_optional_arg(
            args, "--volume3-iops-per-gb", self.suffix,
            "50")  # only applicable to io1, else ignored
        self.volume1_size = get_optional_arg(args, "--volume1-size",
                                             self.suffix, "50")
        self.volume2_size = get_optional_arg(args, "--volume2-size",
                                             self.suffix, "0")
        self.volume3_size = get_optional_arg(args, "--volume3-size",
                                             self.suffix, "0")
        self.volume1_type = get_optional_arg_validated(
            args, "--volume1-type", self.suffix,
            ["ebs-io1", "ebs-st1", "ebs-sc1", "ebs-gp2", "local-nvme"],
            "ebs-gp2")
        self.volume2_type = get_optional_arg_validated(
            args, "--volume2-type", self.suffix,
            ["ebs-io1", "ebs-st1", "ebs-sc1", "ebs-gp2", "local-nvme"],
            "ebs-gp2")
        self.volume3_type = get_optional_arg_validated(
            args, "--volume3-type", self.suffix,
            ["ebs-io1", "ebs-st1", "ebs-sc1", "ebs-gp2", "local-nvme"],
            "ebs-gp2")
        self.volume1_mountpoint = get_optional_arg(args,
                                                   "--volume1-mountpoint",
                                                   self.suffix, "/volume1")
        self.volume2_mountpoint = get_optional_arg(args,
                                                   "--volume2-mountpoint",
                                                   self.suffix, "/volume2")
        self.volume3_mountpoint = get_optional_arg(args,
                                                   "--volume3-mountpoint",
                                                   self.suffix, "/volume3")
        self.data_volume = get_optional_arg(args, "--data-volume", self.suffix,
                                            "volume1")
        self.logs_volume = get_optional_arg(args, "--logs-volume", self.suffix,
                                            "volume1")
        self.quorum_volume = get_optional_arg(args, "--quorum-volume",
                                              self.suffix, "volume1")
        self.wal_volume = get_optional_arg(args, "--wal-volume", self.suffix,
                                           "volume1")
def main():
    args = get_args(sys.argv)

    connect_node = get_optional_arg(args, "--node", "rabbitmq1")
    node_count = int(get_optional_arg(args, "--cluster-size", "3"))
    exchange = get_optional_arg(args, "--ex", "")
    count = int(get_mandatory_arg(args, "--msgs"))
    dup_rate = float(get_optional_arg(args, "--dup-rate", "0"))
    routing_key = get_optional_arg(args, "--rk", "hello")
    queue = get_optional_arg(args, "--queue", None)
    message_type = get_optional_arg(args, "--msg-type", "hello")

    publisher = RabbitPublisher(node_count, connect_node)
    stats = QueueStats('jack', 'jack', queue)

    try:
        if queue != None:
            print("direct")
            publisher.publish_direct(queue, count, 1, dup_rate, message_type)
        else:
            publisher.publish(exchange, routing_key, count, 1, dup_rate,
                              message_type)

        queue_length = stats.get_queue_length(connect_node)
        print(f"Number of message in queue: {queue_length}")

    except NameError as e:
        print(f"Unexpected error: {str(e)}")
Esempio n. 8
0
def main():
    args = get_args(sys.argv)

    consumer_count = int(get_mandatory_arg(args, "--consumers"))
    topic = get_mandatory_arg(args, "--topic")
    print_mod = int(get_mandatory_arg(args, "--print-mod"))

    console_out(f"Starting...", "TEST RUNNER")
    console_out(f"Cluster status:", "TEST RUNNER")
    subprocess.call(["bash", "../cluster/cluster-status.sh"])

    broker_manager = BrokerManager()
    broker_manager.load_initial_nodes()
    initial_nodes = broker_manager.get_initial_nodes()
    console_out(f"Initial nodes: {initial_nodes}", "TEST RUNNER")

    msg_monitor = MessageMonitor(print_mod)
    consumer_manager = ConsumerManager(broker_manager, msg_monitor,
                                       "TEST RUNNER", topic)
    consumer_manager.add_consumers(consumer_count, 1)

    monitor_thread = threading.Thread(target=msg_monitor.process_messages)
    monitor_thread.start()

    consumer_manager.start_consumers()

    while True:
        try:
            time.sleep(1)
        except KeyboardInterrupt:
            break

    try:
        consumer_manager.stop_all_consumers()
        msg_monitor.stop_consuming()
        monitor_thread.join()
    except Exception as e:
        console_out("Failed to clean up test correctly: " + str(e),
                    "TEST RUNNER")
Esempio n. 9
0
    def __init__(self, args):
        super().__init__(args)

        self.ami = get_mandatory_arg(args, "--ami", "")
        self.arm_ami = get_mandatory_arg(args, "--arm-ami", "")
        self.broker_sg = get_mandatory_arg(args, "--broker-sg", "")
        self.loadgen_sg = get_mandatory_arg(args, "--loadgen-sg", "")
        self.loadgen_instance = get_mandatory_arg(args, "--loadgen-instance", "")
        self.subnet = get_mandatory_arg(args, "--subnet", "")
        self.key_pair = get_mandatory_arg(args, "--keypair", "")
        self.hosting = "aws"
Esempio n. 10
0
    def __init__(self, args):
        self.run_id = str(uuid.uuid4())
        self.tags = get_mandatory_arg(args, "--tags", "")
        self.mode = get_optional_arg_validated(args, "--mode", "",
                                               ["benchmark", "model"],
                                               "benchmark")
        self.config_count = int(
            get_optional_arg(args, "--config-count", "", "1"))
        self.new_instance_per_run = is_true(
            get_optional_arg(args, "--new-instance-per-run", "", "false"))
        self.no_destroy = is_true(
            get_optional_arg(args, "--no-destroy", "", "false"))
        self.no_deploy = is_true(
            get_optional_arg(args, "--no-deploy", "", "false"))
        self.run_tag = get_optional_arg(args, "--run-tag", "", "none")
        self.playlist_file = get_mandatory_arg(args, "--playlist-file", "")
        self.background_policies_file = get_optional_arg(
            args, "--bg-policies-file", "", "none")
        self.background_topology_file = get_optional_arg(
            args, "--bg-topology-file", "", "none")
        self.background_delay = int(
            get_optional_arg(args, "--bg-delay", "", "0"))
        self.background_step_seconds = int(
            get_optional_arg(args, "--bg-step-seconds", "", "0"))
        self.background_step_repeat = int(
            get_optional_arg(args, "--bg-step-repeat", "", "0"))
        self.gap_seconds = int(get_mandatory_arg(args, "--gap-seconds", ""))
        self.repeat_count = int(get_optional_arg(args, "--repeat", "", "1"))
        self.parallel_count = int(get_optional_arg(args, "--parallel", "",
                                                   "1"))
        self.override_step_seconds = int(
            get_optional_arg(args, "--override-step-seconds", "", "0"))
        self.override_step_repeat = int(
            get_optional_arg(args, "--override-step-repeat", "", "0"))
        self.override_step_msg_limit = int(
            get_optional_arg(args, "--override-step-msg-limit", "", "0"))
        self.override_broker_hosts = get_optional_arg(
            args, "--override-broker-hosts", "", "")
        self.federation_enabled = is_true(
            get_optional_arg(args, "--federation-enabled", "", "false"))

        self.username = "******"
        self.password = get_mandatory_arg(args, "--password", "")
        self.postgres_url = get_mandatory_arg(args, "--postgres-jdbc-url", "")
        self.postgres_user = get_mandatory_arg(args, "--postgres-user", "")
        self.postgres_pwd = get_mandatory_arg_no_print(args,
                                                       "--postgres-password",
                                                       "")
        self.node_counter = int(
            get_optional_arg(args, "--start-node-num-from", "", "1"))
        self.log_level = get_optional_arg(args, "--log-level", "", "info")
def get_topic(new_cluster, args):
    is_new_topic = False
    if new_cluster:
        topic = get_mandatory_arg(args, "--new-topic")
        is_new_topic = True
    else:
        new_topic = get_optional_arg(args, "--new-topic", "")
        existing_topic = get_optional_arg(args, "--existing-topic", "")

        if new_topic == "" and existing_topic == "":
            console_out("You must provide a topic, either --new-topic or --existing-topic", "TEST_RUNNER")
            exit(1)
        elif new_topic == "":
            topic = existing_topic
        else:
            topic = new_topic
            is_new_topic = True

    return topic, is_new_topic
def main():
    args = get_args(sys.argv)

    connect_node = get_optional_arg(args, "--node", "rabbitmq1")
    node_count = int(get_optional_arg(args, "--cluster-size", "3"))
    exchange = get_optional_arg(args, "--ex", "")
    count = int(get_mandatory_arg(args, "--msgs"))
    dup_rate = float(get_optional_arg(args, "--dup-rate", "0"))
    routing_key = get_optional_arg(args, "--rk", "hello")
    queue = get_optional_arg(args, "--queue", None)

    message_type = "large-msgs"

    publisher = RabbitPublisher(node_count, connect_node)

    if queue != None:
        print("direct")
        publisher.publish_direct(queue, count, 1, dup_rate, message_type)
    else:
        publisher.publish(exchange, routing_key, count, 1, dup_rate,
                          message_type)
Esempio n. 13
0
    def __init__(self, args):
        self.run_id = str(uuid.uuid4())
        self.tags = get_mandatory_arg(args, "--tags", "")
        self.mode = get_optional_arg_validated(args, "--mode", "", ["benchmark","model"], "benchmark")
        self.config_count = int(get_optional_arg(args, "--config-count", "", "1"))
        self.new_instance_per_run = is_true(get_optional_arg(args, "--new-instance-per-run", "", "false"))
        self.no_destroy = is_true(get_optional_arg(args, "--no-destroy", "", "false"))
        self.no_deploy = is_true(get_optional_arg(args, "--no-deploy", "", "false"))
        self.restart_brokers = is_true(get_optional_arg(args, "--restart-brokers", "", "true"))
        self.run_tag = get_optional_arg(args, "--run-tag", "", "none")
        self.playlist_file = get_mandatory_arg(args, "--playlist-file", "")
        # note that for AWS, background load has been moved to playlists. TODO: do same for GCP
        self.background_policies_file = get_optional_arg(args, "--bg-policies-file", "", "none") # GCP only
        self.background_topology_file = get_optional_arg(args, "--bg-topology-file", "", "none") # GCP only
        self.background_delay = int(get_optional_arg(args, "--bg-delay-seconds", "", "0")) # GCP only
        self.background_step_seconds = int(get_optional_arg(args, "--bg-step-seconds", "", "0")) # GCP only
        self.background_step_repeat = int(get_optional_arg(args, "--bg-step-repeat", "", "0")) # GCP only
        self.gap_seconds = int(get_mandatory_arg(args, "--gap-seconds", ""))
        self.start_allowance_ms = int(get_optional_arg(args, "--start-allowance-seconds", "", "60"))
        self.repeat_count = int(get_optional_arg(args, "--repeat", "", "1"))
        self.parallel_count = int(get_optional_arg(args, "--parallel", "", "1"))
        self.override_step_seconds = int(get_optional_arg(args, "--override-step-seconds", "", "0"))
        self.override_step_repeat = int(get_optional_arg(args, "--override-step-repeat", "", "0"))
        self.override_step_msg_limit = int(get_optional_arg(args, "--override-step-msg-limit", "", "0"))
        self.override_broker_hosts = get_optional_arg(args, "--override-broker-hosts", "", "")
        self.federation_enabled = is_true(get_optional_arg(args, "--federation-enabled", "", "false"))
        self.attempts = get_optional_arg(args, "--attempts", "", "1")
        self.warmUpSeconds = get_optional_arg(args, "--warm-up-seconds", "", "0")


        # model mode only. Value: dataloss,duplicates,ordering,consumption,connectivity. Don't use ordering unless one consumer per queue.
        self.checks = get_optional_arg(args, "--checks", "", "dataloss,duplicates,connectivity")
        self.grace_period_sec = get_optional_arg(args, "--grace-period-sec", "", "60")
        
        self.username = "******"
        self.password = get_mandatory_arg(args, "--password", "")
        self.postgres_url = get_mandatory_arg(args, "--postgres-jdbc-url", "")
        self.postgres_user = get_mandatory_arg(args, "--postgres-user", "")
        self.postgres_pwd = get_mandatory_arg_no_print(args, "--postgres-password", "")
        self.node_counter = int(get_optional_arg(args, "--start-node-num-from", "", "1"))
        self.log_level = get_optional_arg(args, "--log-level", "", "info")
        self.influx_subpath = get_mandatory_arg(args, "--influx-subpath", "")
Esempio n. 14
0
        self.processing_ms_min = processing_ms_min
        self.processing_ms_max = processing_ms_max

        try:
            self.receive_channel.start_consuming()
        except KeyboardInterrupt:
            self.disconnect()
        except Exception as ex:
            template = "An exception of type {0} occurred. Arguments:{1!r}"
            message = template.format(type(ex).__name__, ex.args)
            print(message) 
                    
    def disconnect(self):
        self.connection.close()

args = get_args(sys.argv)

connect_node = get_optional_arg(args, "--node", "rabbitmq1") 
queue = get_mandatory_arg(args, "--in-queue") 
out_queue = get_mandatory_arg(args, "--out-queue") 
prefetch =  int(get_optional_arg(args, "--prefetch", "1"))
processing_ms_min = int(get_optional_arg(args, "--min-ms", "0")) 
processing_ms_max = int(get_optional_arg(args, "--max-ms", "0")) 
dedup_enabled = get_optional_arg(args, "--dedup", "false") == "true"

print(f"Consuming queue: {queue} Writing to: {out_queue}")

consumer = RabbitConsumer()
consumer.connect(connect_node)
consumer.consume(queue, out_queue, prefetch, processing_ms_min, processing_ms_max, dedup_enabled)
def main():
    args = get_args(sys.argv)

    tests = int(get_mandatory_arg(args, "--tests"))
    run_minutes = int(get_mandatory_arg(args, "--run-minutes"))
    consumer_count = 1
    topic = get_mandatory_arg(args, "--topic")
    idempotence = is_true(get_mandatory_arg(args, "--idempotence"))
    partitions = 1

    cluster_size = get_optional_arg(args, "--cluster", "3")
    in_flight_max = int(get_optional_arg(args, "--in-flight-max", 10000))
    buffering_max = int(get_optional_arg(args, "--buffering-max-ms", 0))
    min_insync_reps = 1
    unclean_failover = "false"
    sequence_count = 1
    rep_factor = get_optional_arg(args, "--rep-factor", "3")
    acks_mode = get_optional_arg(args, "--acks-mode", "all")
    print_mod = int(get_optional_arg(args, "--print-mod", "0"))
    new_cluster = is_true(get_optional_arg(args, "--new-cluster", "true"))
    group_id = get_optional_arg(args, "--group-id", str(uuid.uuid1()))

    if print_mod == 0:
        print_mod = in_flight_max * 3;

    for test_number in range(tests):

        print("")
        console_out(f"TEST RUN: {str(test_number)} with idempotence={idempotence}--------------------------", "TEST RUNNER")
        broker_manager = BrokerManager("confluent", True)
        
        if new_cluster:
            broker_manager.deploy(cluster_size, True)
            
        broker_manager.load_initial_nodes()
        initial_nodes = broker_manager.get_initial_nodes()
        console_out(f"Initial nodes: {initial_nodes}", "TEST RUNNER")
        broker_manager.correct_advertised_listeners()

        topic_name = topic + "_" + str(test_number)
        mgmt_node = broker_manager.get_random_init_node()
        console_out(f"Creating topic {topic_name} using node {mgmt_node}", "TEST RUNNER")
        broker_manager.create_topic(mgmt_node, topic_name, rep_factor, partitions, min_insync_reps, unclean_failover)
        
        time.sleep(10)

        msg_monitor = MessageMonitor(print_mod, True)
        chaos = ChaosExecutor(broker_manager)
        
        pub_node = broker_manager.get_random_init_node()
        producer = KafkaProducer(test_number, 1, broker_manager, acks_mode, in_flight_max, print_mod)
        
        if idempotence:
            producer.create_idempotent_producer(10000000, buffering_max)
        else:
            producer.create_producer(1000000, buffering_max)

        producer.configure_as_sequence(sequence_count)
        
        monitor_thread = threading.Thread(target=msg_monitor.process_messages)
        monitor_thread.start()
        
        pub_thread = threading.Thread(target=producer.start_producing,args=(topic_name, 1000000000))
        pub_thread.start()
        console_out("producer started", "TEST RUNNER")

        init_wait_sec = 20
        console_out(f"Will start chaos and consumer actions in {init_wait_sec} seconds", "TEST RUNNER")
        time.sleep(init_wait_sec)

        chaos_thread = threading.Thread(target=chaos.start_kill_leader_or_connections,args=(topic_name, 0))
        chaos_thread.start()
        console_out("Chaos executor started", "TEST RUNNER")

        ctr = 1
        while ctr < run_minutes:
            time.sleep(60)
            console_out(f"Test at {ctr} minute mark, {run_minutes-ctr} minutes left", "TEST RUNNER")
            ctr += 1

        producer.stop_producing()

        try:
            chaos.stop_chaos_actions()
            chaos_thread.join()
            console_out(f"Chaos executor shutdown", "TEST RUNNER")
        except Exception as e:
            console_out("Failed to stop chaos cleanly: " + str(e), "TEST RUNNER")
        

        subprocess.call(["bash", "../cluster/cluster-status.sh"])
        time.sleep(60)
        
        consumer_manager = ConsumerManager(broker_manager, msg_monitor, "TEST RUNNER", topic_name, group_id)
        consumer_manager.add_consumers(consumer_count, test_number)
        consumer_manager.start_consumers()
        
        ctr = 0
        
        while ctr < 300:
            if msg_monitor.get_unique_count() >= producer.get_pos_ack_count() and len(producer.get_msg_set().difference(msg_monitor.get_msg_set())) == 0:
               break
            time.sleep(1)
            ctr += 1

        confirmed_set = producer.get_msg_set()
        lost_msgs = confirmed_set.difference(msg_monitor.get_msg_set())
        duplicates = msg_monitor.get_receive_count() - msg_monitor.get_unique_count()

        console_out("RESULTS------------------------------------", "TEST RUNNER")
        console_out(f"Confirmed count: {producer.get_pos_ack_count()} Received count: {msg_monitor.get_receive_count()} Unique received: {msg_monitor.get_unique_count()}", "TEST RUNNER")
        console_out(f"Duplication count: {duplicates}", "TEST RUNNER")

        success = True
        if len(lost_msgs) > 0:
            console_out(f"FAILED TEST: Lost messages: {len(lost_msgs)}", "TEST RUNNER")
            success = False

        if idempotence and msg_monitor.get_out_of_order():
            success = False
            console_out(f"FAILED TEST: Received out-of-order messages", "TEST RUNNER")

        if idempotence and duplicates:
            success = False
            console_out(f"FAILED TEST: Duplicates", "TEST RUNNER")

        if success:
            console_out("TEST OK", "TEST RUNNER")

        console_out("RESULTS END------------------------------------", "TEST RUNNER")

        try:
            consumer_manager.stop_all_consumers()
            pub_thread.join()
            msg_monitor.stop_consuming()
            monitor_thread.join()
        except Exception as e:
            console_out("Failed to clean up test correctly: " + str(e), "TEST RUNNER")

        console_out(f"TEST {str(test_number )} COMPLETE", "TEST RUNNER")
Esempio n. 16
0
def sigterm_handler(_signo, _stack_frame):
    print("sigterm_handler executed, %s, %s" % (_signo, _stack_frame))
    sys.exit(0)


if __name__ == "__main__":
    signal.signal(signal.SIGTERM, sigterm_handler)

    args = get_args(sys.argv)

    user = get_optional_arg(args, "--user", "test")
    password = get_optional_arg(args, "--password", "test")
    use_https = is_true(get_optional_arg(args, "--use-https", "false"))
    virtual_host = get_optional_arg(args, "--vhost", "%2f")
    queue = get_optional_arg(args, "--queue", f"q{random.randint(0, 100000)}")
    msg_count = int(get_mandatory_arg(args, "--msg-count"))
    print_mod = int(get_optional_arg(args, "--print-mod", "1000"))
    use_confirms = is_true(get_mandatory_arg(args, "--use-confirms"))
    use_amqproxy = is_true(get_mandatory_arg(args, "--use-amqproxy"))
    use_toxiproxy = is_true(get_mandatory_arg(args, "--use-toxiproxy"))
    mgmt_ip = get_mandatory_arg(args, "--mgmt-ip")
    mgmt_port = get_mandatory_arg(args, "--mgmt-port")
    broker_name = get_mandatory_arg(args, "--broker-name")
    
    if use_amqproxy:
        amqproxy_ip = get_mandatory_arg(args, "--amqproxy-ip")
        amqproxy_port = get_mandatory_arg(args, "--amqproxy-port")
        broker_ip = ""
        broker_port = ""
    else:
        broker_ip = get_mandatory_arg(args, "--broker-ip")
Esempio n. 17
0
#!/usr/bin/env python
import pika
from pika import spec
import sys
import time
import subprocess
import datetime
import uuid
import random
from command_args import get_args, get_mandatory_arg, get_optional_arg

args = get_args(sys.argv)

connect_node = get_optional_arg(args, "--node", "rabbitmq1")
node_count = int(get_optional_arg(args, "--cluster-size", "3"))
exchange = get_mandatory_arg(args, "--ex")
count = int(get_mandatory_arg(args, "--msgs"))
state_count = int(get_mandatory_arg(args, "--keys"))
dup_rate = float(get_optional_arg(args, "--dup-rate", "0"))
total = count * state_count

if state_count > 10:
    print("State count limit is 10")
    exit(1)

terminate = False
exit_triggered = False
last_ack_time = datetime.datetime.now()
last_ack = 0

node_names = []
def main():
    args = get_args(sys.argv)

    cluster_size = get_optional_arg(args, "--cluster", "3")
    new_cluster = is_true(get_mandatory_arg(args, "--new-cluster"))
    use_blockade = is_true(get_optional_arg(args, "--use-blockade", "true"))
    image_version = get_optional_arg(args, "--image-version", "confluent")
    
    consumer_count = int(get_optional_arg(args, "--consumers", "1"))
    group_id = get_optional_arg(args, "--group-id", str(uuid.uuid1()))
    grace_period_sec = int(get_optional_arg(args, "--grace-period-sec", "300"))
    topic, is_new_topic = get_topic(new_cluster, args)
        
    partitions = get_optional_arg(args, "--partitions", "3")
    rep_factor = get_optional_arg(args, "--rep-factor", "3")


    analyze = is_true(get_optional_arg(args, "--analyze", "true"))
    producer_count = int(get_optional_arg(args, "--producers", 1))
    in_flight_max = int(get_optional_arg(args, "--in-flight-max", 100))
    min_insync_reps = int(get_optional_arg(args, "--min-insync-replicas", "1"))
    unclean_failover = get_optional_arg(args, "--unclean-failover", "false")
    sequence_count = int(get_optional_arg(args, "--sequences", "1"))
    acks_mode = get_optional_arg(args, "--acks-mode", "all")
    print_mod = int(get_optional_arg(args, "--print-mod", "0"))

    if print_mod == 0:
        print_mod = in_flight_max * 3;
    
    test_number = 1
    console_out(f"Starting...", "TEST RUNNER")

    broker_manager = BrokerManager(image_version, use_blockade)
    broker_manager.deploy(cluster_size, new_cluster)

    initial_nodes = broker_manager.get_initial_nodes()
    console_out(f"Initial nodes: {initial_nodes}", "TEST RUNNER")
    
    topic_name = topic
    
    if new_cluster or is_new_topic:
        mgmt_node = broker_manager.get_random_init_node()
        console_out(f"Creating topic {topic_name} using node {mgmt_node}", "TEST RUNNER")
        broker_manager.create_topic(mgmt_node, topic_name, rep_factor, partitions, min_insync_reps, unclean_failover)
    
    time.sleep(10)

    msg_monitor = MessageMonitor(print_mod, analyze)
    
    prod_manager = ProducerManager(broker_manager, "TEST RUNNER", topic_name)
    prod_manager.add_producers(producer_count, test_number, acks_mode, in_flight_max, print_mod, sequence_count)

    consumer_manager = ConsumerManager(broker_manager, msg_monitor, "TEST RUNNER", topic_name, group_id)
    consumer_manager.add_consumers(consumer_count, test_number)

    monitor_thread = threading.Thread(target=msg_monitor.process_messages)
    monitor_thread.start()
    
    consumer_manager.start_consumers()
    time.sleep(30)
    prod_manager.start_producers()
    

    while True:
        try:
            command = input("a=add consumer, r=remove consumer - then hit enter")
            if command == "a":
                consumer_manager.add_consumer_and_start_consumer(test_number)
            elif command == "r":
                consumer_manager.stop_and_remove_consumer()
            else:
                console_out("Unknown command", "TEST_RUNNER")
        except KeyboardInterrupt:
            console_out("Stopping producer. Starting grace period for consumers to catch up.", "TEST_RUNNER")
            prod_manager.stop_all_producers()
            break

    if producer_count > 0:
        try:
            ctr = 0
            while ctr < grace_period_sec:
                if msg_monitor.get_unique_count() >= prod_manager.get_total_pos_ack_count() and len(prod_manager.get_total_msg_set().difference(msg_monitor.get_msg_set())) == 0:
                    break
                time.sleep(1)
                ctr += 1
        except KeyboardInterrupt:
            console_out("Grace period ended", "TEST RUNNER")

    confirmed_set = prod_manager.get_total_msg_set()
    lost_msgs = confirmed_set.difference(msg_monitor.get_msg_set())

    console_out("RESULTS------------------------------------", "TEST RUNNER")
    console_out(f"Confirmed count: {prod_manager.get_total_pos_ack_count()} Received count: {msg_monitor.get_receive_count()} Unique received: {msg_monitor.get_unique_count()}", "TEST RUNNER")

    if analyze:
        success = True
        if len(lost_msgs) > 0:
            console_out(f"FAILED TEST: Lost messages: {len(lost_msgs)}", "TEST RUNNER")
            success = False

        if msg_monitor.get_out_of_order() == True:
            success = False
            console_out(f"FAILED TEST: Received out-of-order messages", "TEST RUNNER")

        if success:
            console_out("TEST OK", "TEST RUNNER")
    console_out("RESULTS END------------------------------------", "TEST RUNNER")

    try:
        consumer_manager.stop_all_consumers()
        msg_monitor.stop_consuming()
        monitor_thread.join()
        prod_manager.stop_all_producers()
    except Exception as e:
        console_out("Failed to clean up test correctly: " + str(e), "TEST RUNNER")

    console_out(f"TEST {str(test_number )} COMPLETE", "TEST RUNNER")
def main():
    args = get_args(sys.argv)

    new_cluster = get_mandatory_arg(args, "--new-cluster")
    consumer_count = int(get_mandatory_arg(args, "--consumers"))
    grace_period_sec = int(get_mandatory_arg(args, "--grace-period-sec"))
    topic = get_mandatory_arg(args, "--topic")
    partitions = get_mandatory_arg(args, "--partitions")

    cluster_size = get_optional_arg(args, "--cluster", "3")
    in_flight_max = int(get_optional_arg(args, "--in-flight-max", 100))
    min_insync_reps = int(get_optional_arg(args, "--min-insync-replicas", "1"))
    unclean_failover = get_optional_arg(args, "--unclean-failover", "false")
    sequence_count = int(get_optional_arg(args, "--sequences", "1"))
    rep_factor = get_optional_arg(args, "--rep-factor", "3")
    acks_mode = get_optional_arg(args, "--acks-mode", "all")
    print_mod = int(get_optional_arg(args, "--print-mod", "0"))

    if print_mod == 0:
        print_mod = in_flight_max * 3

    test_number = 1
    console_out(f"Starting...", "TEST RUNNER")

    if new_cluster.upper() == "TRUE":
        subprocess.call(
            ["bash", "../automated/setup-test-run.sh", cluster_size, "3.8"])
        console_out(f"Waiting for cluster...", "TEST RUNNER")
        time.sleep(30)
    else:
        console_out(f"Using existing cluster...", "TEST RUNNER")

    console_out(f"Cluster status:", "TEST RUNNER")
    subprocess.call(["bash", "../cluster/cluster-status.sh"])

    broker_manager = BrokerManager()
    broker_manager.load_initial_nodes()
    initial_nodes = broker_manager.get_initial_nodes()
    console_out(f"Initial nodes: {initial_nodes}", "TEST RUNNER")
    broker_manager.correct_advertised_listeners()

    topic_name = topic
    mgmt_node = broker_manager.get_random_init_node()
    console_out(f"Creating topic {topic_name} using node {mgmt_node}",
                "TEST RUNNER")
    broker_manager.create_topic(mgmt_node, topic_name, rep_factor, partitions,
                                min_insync_reps, unclean_failover)

    time.sleep(10)

    msg_monitor = MessageMonitor(print_mod)
    chaos = ChaosExecutor(broker_manager)
    consumer_manager = ConsumerManager(broker_manager, msg_monitor,
                                       "TEST RUNNER", topic_name)

    pub_node = broker_manager.get_random_init_node()
    producer = KafkaProducer(test_number, 1, broker_manager, acks_mode,
                             in_flight_max, print_mod)
    producer.create_producer()
    producer.configure_as_sequence(sequence_count)
    consumer_manager.add_consumers(consumer_count, test_number)

    monitor_thread = threading.Thread(target=msg_monitor.process_messages)
    monitor_thread.start()

    consumer_manager.start_consumers()

    pub_thread = threading.Thread(target=producer.start_producing,
                                  args=(topic_name, 10000000))
    pub_thread.start()
    console_out("producer started", "TEST RUNNER")

    while True:
        try:
            time.sleep(1)
        except KeyboardInterrupt:
            console_out(
                "Stopping producer. Starting grace period for consumers to catch up.",
                "TEST_RUNNER")
            producer.stop_producing()
            break

    try:
        ctr = 0
        while ctr < grace_period_sec:
            if msg_monitor.get_unique_count() >= producer.get_pos_ack_count(
            ) and len(producer.get_msg_set().difference(
                    msg_monitor.get_msg_set())) == 0:
                break
            time.sleep(1)
            ctr += 1
    except KeyboardInterrupt:
        console_out("Grace period ended", "TEST RUNNER")

    confirmed_set = producer.get_msg_set()
    lost_msgs = confirmed_set.difference(msg_monitor.get_msg_set())

    console_out("RESULTS------------------------------------", "TEST RUNNER")
    console_out(
        f"Confirmed count: {producer.get_pos_ack_count()} Received count: {msg_monitor.get_receive_count()} Unique received: {msg_monitor.get_unique_count()}",
        "TEST RUNNER")

    success = True
    if len(lost_msgs) > 0:
        console_out(f"FAILED TEST: Lost messages: {len(lost_msgs)}",
                    "TEST RUNNER")
        success = False

    if msg_monitor.get_out_of_order() == True:
        success = False
        console_out(f"FAILED TEST: Received out-of-order messages",
                    "TEST RUNNER")

    if success:
        console_out("TEST OK", "TEST RUNNER")

    console_out("RESULTS END------------------------------------",
                "TEST RUNNER")

    try:
        consumer_manager.stop_all_consumers()
        msg_monitor.stop_consuming()
        monitor_thread.join()
        pub_thread.join()
    except Exception as e:
        console_out("Failed to clean up test correctly: " + str(e),
                    "TEST RUNNER")

    console_out(f"TEST {str(test_number )} COMPLETE", "TEST RUNNER")
def main():
    print("quorum-queue-test.py")
    args = get_args(sys.argv)

    count = -1  # no limit
    tests = int(get_mandatory_arg(args, "--tests"))
    actions = int(get_mandatory_arg(args, "--actions"))
    in_flight_max = int(get_optional_arg(args, "--in-flight-max", 10))
    grace_period_sec = int(get_mandatory_arg(args, "--grace-period-sec"))
    cluster_size = get_optional_arg(args, "--cluster", "3")
    queue = get_mandatory_arg(args, "--queue")
    sac_enabled = is_true(get_mandatory_arg(args, "--sac"))
    chaos_mode = get_optional_arg(args, "--chaos-mode", "mixed")
    chaos_min_interval = int(
        get_optional_arg(args, "--chaos-min-interval", "30"))
    chaos_max_interval = int(
        get_optional_arg(args, "--chaos-max-interval", "120"))
    prefetch = int(get_optional_arg(args, "--pre-fetch", "10"))
    rmq_version = get_optional_arg_validated(args, "--rmq-version", "3.8-beta",
                                             ["3.7", "3.8-beta", "3.8-alpha"])

    for test_number in range(1, tests + 1):

        print("")
        console_out(
            f"TEST RUN: {str(test_number)} of {tests}--------------------------",
            "TEST RUNNER")
        setup_complete = False

        while not setup_complete:
            broker_manager = BrokerManager()
            broker_manager.deploy(cluster_size, True, rmq_version, False)
            initial_nodes = broker_manager.get_initial_nodes()

            console_out(f"Initial nodes: {initial_nodes}", "TEST RUNNER")

            print_mod = in_flight_max * 5
            queue_name = queue + "_" + str(test_number)

            mgmt_node = broker_manager.get_random_init_node()
            queue_created = False
            qc_ctr = 0
            while queue_created == False and qc_ctr < 20:
                qc_ctr += 1
                if sac_enabled:
                    queue_created = broker_manager.create_quorum_sac_queue(
                        mgmt_node, queue_name, cluster_size, 0)
                else:
                    queue_created = broker_manager.create_quorum_queue(
                        mgmt_node, queue_name, cluster_size, 0)

                if queue_created:
                    setup_complete = True
                else:
                    time.sleep(5)

        time.sleep(10)

        msg_monitor = MessageMonitor("qqt", test_number, print_mod, True,
                                     False)
        publisher = RabbitPublisher(1, test_number, broker_manager,
                                    in_flight_max, 120, print_mod)
        publisher.configure_sequence_direct(queue_name, count, 0, 1)
        consumer_manager = ConsumerManager(broker_manager, msg_monitor,
                                           "TEST RUNNER", False)
        consumer_manager.add_consumers(1, test_number, queue_name, prefetch)

        chaos = ChaosExecutor(initial_nodes)

        if chaos_mode == "partitions":
            chaos.only_partitions()
        elif chaos_mode == "nodes":
            chaos.only_kill_nodes()

        monitor_thread = threading.Thread(target=msg_monitor.process_messages)
        monitor_thread.start()

        consumer_manager.start_consumers()

        pub_thread = threading.Thread(target=publisher.start_publishing)
        pub_thread.start()
        console_out("publisher started", "TEST RUNNER")

        for action_num in range(1, actions + 1):
            wait_sec = random.randint(chaos_min_interval, chaos_max_interval)
            console_out(f"waiting for {wait_sec} seconds before next action",
                        "TEST RUNNER")
            time.sleep(wait_sec)

            console_out(
                f"execute chaos action {str(action_num)}/{actions} of test {str(test_number)}",
                "TEST RUNNER")
            chaos.execute_chaos_action()
            subprocess.call(["bash", "../cluster/cluster-status.sh"])

        time.sleep(60)
        console_out("repairing cluster", "TEST RUNNER")
        chaos.repair()
        console_out("repaired cluster", "TEST RUNNER")

        publisher.stop_publishing()

        console_out("starting grace period for consumer to catch up",
                    "TEST RUNNER")
        ctr = 0

        while True:
            ms_since_last_msg = datetime.datetime.now(
            ) - msg_monitor.get_last_msg_time()
            if msg_monitor.get_unique_count() >= publisher.get_pos_ack_count(
            ) and len(publisher.get_msg_set().difference(
                    msg_monitor.get_msg_set())) == 0:
                break
            elif ctr > grace_period_sec and ms_since_last_msg.total_seconds(
            ) > 15:
                break
            time.sleep(1)
            ctr += 1

        confirmed_set = publisher.get_msg_set()
        lost_msgs = confirmed_set.difference(msg_monitor.get_msg_set())

        console_out("RESULTS------------------------------------",
                    "TEST RUNNER")

        if len(lost_msgs) > 0:
            console_out(f"Lost messages count: {len(lost_msgs)}",
                        "TEST RUNNER")
            for msg in lost_msgs:
                console_out(f"Lost message: {msg}", "TEST RUNNER")

        console_out(
            f"Confirmed count: {publisher.get_pos_ack_count()} Received count: {msg_monitor.get_receive_count()} Unique received: {msg_monitor.get_unique_count()}",
            "TEST RUNNER")
        success = True

        if msg_monitor.get_out_of_order() == True:
            console_out("FAILED TEST: OUT OF ORDER MESSAGES", "TEST RUNNER")
            success = False

        if len(lost_msgs) > 0:
            console_out("FAILED TEST: LOST MESSAGES", "TEST RUNNER")
            success = False

        if success == True:
            console_out("TEST OK", "TEST RUNNER")

        console_out("RESULTS END------------------------------------",
                    "TEST RUNNER")

        try:
            consumer_manager.stop_all_consumers()
            pub_thread.join()
        except Exception as e:
            console_out("Failed to clean up test correctly: " + str(e),
                        "TEST RUNNER")

        console_out(f"TEST {str(test_number)} COMPLETE", "TEST RUNNER")
Esempio n. 21
0
def main():

    #signal.signal(signal.SIGINT, interuppt_handler)
    args = get_args(sys.argv)

    count = -1 # no limit
    tests = int(get_mandatory_arg(args, "--tests"))
    run_minutes = int(get_mandatory_arg(args, "--run-minutes"))
    consumer_count = int(get_mandatory_arg(args, "--consumers"))
    grace_period_sec = int(get_mandatory_arg(args, "--grace-period-sec"))
    queue = get_mandatory_arg(args, "--queue")
    queue_type = get_mandatory_arg(args, "--queue-type")
    sac = get_mandatory_arg(args, "--sac")

    publisher_count = int(get_optional_arg(args, "--publishers", "1"))
    print_mod = int(get_optional_arg(args, "--print-mod", "0"))
    new_cluster = get_optional_arg(args, "--new-cluster", "true")
    in_flight_max = int(get_optional_arg(args, "--in-flight-max", "10"))
    sequence_count = int(get_optional_arg(args, "--sequences", "1"))
    cluster_size = get_optional_arg(args, "--cluster", "3")
    chaos = get_optional_arg(args, "--chaos-actions", "true")
    chaos_mode = get_optional_arg(args, "--chaos-mode", "mixed")
    chaos_min_interval = int(get_optional_arg(args, "--chaos-min-interval", "60"))
    chaos_max_interval = int(get_optional_arg(args, "--chaos-max-interval", "120"))
    consumer_actions = get_optional_arg(args, "--consumer-actions", "true")
    con_action_min_interval = int(get_optional_arg(args, "--consumer-min-interval", "20"))
    con_action_max_interval = int(get_optional_arg(args, "--consumer-max-interval", "60"))

    if print_mod == 0:
        print_mod = in_flight_max * 5

    include_chaos = True
    if chaos.upper() == "FALSE":
        include_chaos = False

    include_con_actions = True
    if consumer_actions.upper() == "FALSE":
        include_con_actions = False

    sac_enabled = True
    if sac.upper() == "FALSE":
        sac_enabled = False

    message_type = "sequence"
    
    for test_number in range(tests):

        print("")
        console_out(f"TEST RUN: {str(test_number)} --------------------------", "TEST RUNNER")
        if new_cluster.upper() == "TRUE":
            subprocess.call(["bash", "../automated/setup-test-run.sh", cluster_size, "3.8"])
            console_out(f"Waiting for cluster...", "TEST RUNNER")
            time.sleep(30)

        console_out(f"Cluster status:", "TEST RUNNER")
        subprocess.call(["bash", "../cluster/cluster-status.sh"])
        
        broker_manager = BrokerManager()
        broker_manager.load_initial_nodes()
        initial_nodes = broker_manager.get_initial_nodes()
        console_out(f"Initial nodes: {initial_nodes}", "TEST RUNNER")

        queue_name = queue + "_" + str(test_number)
        mgmt_node = broker_manager.get_random_init_node()
        queue_created = False

        while queue_created == False:  
            if sac_enabled:  
                queue_created = broker_manager.create_sac_queue(mgmt_node, queue_name, cluster_size, queue_type)
            else:
                queue_created = broker_manager.create_queue(mgmt_node, queue_name, cluster_size, queue_type)

            if queue_created == False:
                time.sleep(5)

        time.sleep(10)

        msg_monitor = MessageMonitor(print_mod)
        stats = QueueStats('jack', 'jack', queue_name)
        chaos = ChaosExecutor(initial_nodes)

        if chaos_mode == "partitions":
            chaos.only_partitions()
        elif chaos_mode == "nodes":
            chaos.only_kill_nodes()

        consumer_manager = ConsumerManager(broker_manager, msg_monitor, "TEST RUNNER")

        pub_node = broker_manager.get_random_init_node()
        publisher = RabbitPublisher(f"PUBLISHER(Test:{test_number} Id:P1)", initial_nodes, pub_node, in_flight_max, 120, print_mod)
        consumer_manager.add_consumers(consumer_count, test_number, queue_name)

        monitor_thread = threading.Thread(target=msg_monitor.process_messages)
        monitor_thread.start()
        
        consumer_manager.start_consumers()

        if publisher_count == 1:
            pub_thread = threading.Thread(target=publisher.publish_direct,args=(queue_name, count, sequence_count, 0, "sequence"))
            pub_thread.start()
            console_out("publisher started", "TEST RUNNER")

        if include_con_actions or include_chaos:
            init_wait_sec = 20
            console_out(f"Will start chaos and consumer actions in {init_wait_sec} seconds", "TEST RUNNER")
            time.sleep(init_wait_sec)

        if include_chaos:
            chaos_thread = threading.Thread(target=chaos.start_random_single_action_and_repair,args=(chaos_min_interval,chaos_max_interval))
            chaos_thread.start()
            console_out("Chaos executor started", "TEST RUNNER")

        if include_con_actions:
            consumer_action_thread = threading.Thread(target=consumer_manager.start_random_consumer_actions,args=(con_action_min_interval, con_action_max_interval))
            consumer_action_thread.start()
            console_out("Consumer actions started", "TEST RUNNER")

        
        ctr = 0
        run_seconds = run_minutes * 60
        while ctr < run_seconds and not stop_please:
            try:
                time.sleep(1)
                ctr += 1

                if ctr % 60 == 0:
                    console_out(f"Test at {int(ctr/60)} minute mark, {int((run_seconds-ctr)/60)} minutes left", "TEST RUNNER")
            except KeyboardInterrupt:
                console_out(f"Test forced to stop at {int(ctr/60)} minute mark, {int((run_seconds-ctr)/60)} minutes left)", "TEST RUNNER")
                break

        try:
            chaos.stop_random_single_action_and_repair()
            consumer_manager.stop_random_consumer_actions()
            
            if include_chaos:
                chaos_thread.join()

            if include_con_actions:
                consumer_action_thread.join()
        except Exception as e:
            console_out("Failed to stop chaos cleanly: " + str(e), "TEST RUNNER")

        console_out("Resuming consumers", "TEST RUNNER")
        consumer_manager.resume_all_consumers()
        
        if publisher_count == 1:
            publisher.stop(True)

        console_out("starting grace period for consumer to catch up", "TEST RUNNER")
        ctr = 0
        
        while ctr < grace_period_sec:
            if msg_monitor.get_unique_count() >= publisher.get_pos_ack_count() and len(publisher.get_msg_set().difference(msg_monitor.get_msg_set())) == 0:
                break
            time.sleep(1)
            ctr += 1

        confirmed_set = publisher.get_msg_set()
        not_consumed_msgs = confirmed_set.difference(msg_monitor.get_msg_set())

        console_out("RESULTS ----------------------------------------", "TEST RUNNER")
        console_out(f"Confirmed count: {publisher.get_pos_ack_count()} Received count: {msg_monitor.get_receive_count()} Unique received: {msg_monitor.get_unique_count()}", "TEST RUNNER")

        success = True
        if len(not_consumed_msgs) > 0:
            console_out(f"FAILED TEST: Potential failure to promote Waiting to Active. Not consumed count: {len(not_consumed_msgs)}", "TEST RUNNER")
            success = False

        if msg_monitor.get_out_of_order() == True:
            success = False
            console_out(f"FAILED TEST: Received out-of-order messages", "TEST RUNNER")

        if success:
            console_out("TEST OK", "TEST RUNNER")

        console_out("RESULTS END ------------------------------------", "TEST RUNNER")

        try:
            consumer_manager.stop_all_consumers()
            
            if publisher_count == 1:
                pub_thread.join()
            msg_monitor.stop_consuming()
            monitor_thread.join()
        except Exception as e:
            console_out("Failed to clean up test correctly: " + str(e), "TEST RUNNER")

        console_out(f"TEST {str(test_number )} COMPLETE", "TEST RUNNER")
Esempio n. 22
0
def main():
    print("random-test.py")
    #signal.signal(signal.SIGINT, interuppt_handler)
    args = get_args(sys.argv)

    count = -1  # no limit
    test_name = get_mandatory_arg(args, "--test-name")
    tests = int(get_mandatory_arg(args, "--tests"))
    run_minutes = int(get_mandatory_arg(args, "--run-minutes"))
    consumer_count = int(get_mandatory_arg(args, "--consumers"))
    prefetch = int(get_optional_arg(args, "--pre-fetch", "10"))
    grace_period_sec = int(get_mandatory_arg(args, "--grace-period-sec"))
    queue = get_mandatory_arg(args, "--queue")
    queue_type = get_mandatory_arg(args, "--queue-type")
    analyze = is_true(get_optional_arg(args, "--analyze", "true"))

    if queue_type == "quorum":
        qq_max_length = int(get_optional_arg(args, "--qq-max-length", "0"))

    sac_enabled = is_true(get_mandatory_arg(args, "--sac"))
    log_messages = is_true(get_optional_arg(args, "--log-msgs", "false"))

    publisher_count = int(get_optional_arg(args, "--publishers", "1"))
    if publisher_count > 0:
        in_flight_max = int(get_optional_arg(args, "--in-flight-max", "10"))
        print_mod = int(
            get_optional_arg(args, "--print-mod", f"{in_flight_max * 5}"))
        sequence_count = int(get_optional_arg(args, "--sequences", "1"))
    else:
        print_mod = int(get_optional_arg(args, "--print-mod", f"1000"))

    new_cluster = is_true(get_optional_arg(args, "--new-cluster", "true"))
    cluster_size = get_optional_arg(args, "--cluster", "3")
    rmq_version = get_optional_arg_validated(args, "--rmq-version", "3.8-beta",
                                             ["3.7", "3.8-beta", "3.8-alpha"])
    stop_mode = get_optional_arg_validated(args, "--stop-mode", "crash",
                                           ["crash", "close", "cancel"])

    use_toxiproxy = False
    consumer_hard_close = False
    if stop_mode == "crash":
        use_toxiproxy = True
    elif stop_mode == "close":
        consumer_hard_close = True

    include_chaos = is_true(get_optional_arg(args, "--chaos-actions", "true"))
    if include_chaos:
        chaos_mode = get_optional_arg(args, "--chaos-mode", "mixed")
        chaos_min_interval = int(
            get_optional_arg(args, "--chaos-min-interval", "60"))
        chaos_max_interval = int(
            get_optional_arg(args, "--chaos-max-interval", "120"))

    include_con_actions = is_true(
        get_optional_arg(args, "--consumer-actions", "true"))
    if include_con_actions:
        con_action_min_interval = int(
            get_optional_arg(args, "--consumer-min-interval", "20"))
        con_action_max_interval = int(
            get_optional_arg(args, "--consumer-max-interval", "60"))

    failed_test_log = list()
    failed_tests = set()

    for test_number in range(tests):

        print("")
        subprocess.call(["mkdir", f"logs/{test_name}/{str(test_number)}"])
        console_out(f"TEST RUN: {str(test_number)} --------------------------",
                    "TEST RUNNER")
        broker_manager = BrokerManager()
        broker_manager.deploy(cluster_size, new_cluster, rmq_version,
                              use_toxiproxy)
        initial_nodes = broker_manager.get_initial_nodes()
        console_out(f"Initial nodes: {initial_nodes}", "TEST RUNNER")

        queue_name = queue + "_" + str(test_number)
        mgmt_node = broker_manager.get_random_init_node()
        queue_created = False

        while queue_created == False:
            if queue_type == "mirrored":
                if sac_enabled:
                    queue_created = broker_manager.create_standard_sac_queue(
                        mgmt_node, queue_name, cluster_size)
                else:
                    queue_created = broker_manager.create_standard_queue(
                        mgmt_node, queue_name, cluster_size)
            elif queue_type == "quorum":
                if sac_enabled:
                    queue_created = broker_manager.create_quorum_sac_queue(
                        mgmt_node, queue_name, cluster_size, qq_max_length)
                else:
                    queue_created = broker_manager.create_quorum_queue(
                        mgmt_node, queue_name, cluster_size, qq_max_length)

            if queue_created == False:
                time.sleep(5)

        time.sleep(10)

        msg_monitor = MessageMonitor(test_name, test_number, print_mod,
                                     analyze, log_messages)
        chaos = ChaosExecutor(initial_nodes)

        if include_chaos:
            if chaos_mode == "partitions":
                chaos.only_partitions()
            elif chaos_mode == "nodes":
                chaos.only_kill_nodes()

        monitor_thread = threading.Thread(target=msg_monitor.process_messages)
        monitor_thread.start()

        if consumer_count > 0:
            consumer_manager = ConsumerManager(broker_manager, msg_monitor,
                                               "TEST RUNNER", use_toxiproxy)
            consumer_manager.add_consumers(consumer_count, test_number,
                                           queue_name, prefetch)
            consumer_manager.start_consumers()

        if publisher_count == 1:
            publisher = RabbitPublisher(1, test_number, broker_manager,
                                        in_flight_max, 120, print_mod)
            publisher.configure_sequence_direct(queue_name, count, 0,
                                                sequence_count)

            pub_thread = threading.Thread(target=publisher.start_publishing)
            pub_thread.start()
            console_out("publisher started", "TEST RUNNER")

        if include_con_actions or include_chaos:
            init_wait_sec = 20
            console_out(
                f"Will start chaos and consumer actions in {init_wait_sec} seconds",
                "TEST RUNNER")
            time.sleep(init_wait_sec)

        if include_chaos:
            chaos_thread = threading.Thread(
                target=chaos.start_random_single_action_and_repair,
                args=(chaos_min_interval, chaos_max_interval))
            chaos_thread.start()
            console_out("Chaos executor started", "TEST RUNNER")

        if include_con_actions:
            consumer_action_thread = threading.Thread(
                target=consumer_manager.start_random_consumer_actions,
                args=(con_action_min_interval, con_action_max_interval,
                      consumer_hard_close))
            consumer_action_thread.start()
            console_out("Consumer actions started", "TEST RUNNER")

        ctr = 0
        run_seconds = run_minutes * 60
        while ctr < run_seconds and not stop_please:
            try:
                time.sleep(1)
                ctr += 1

                if ctr % 60 == 0:
                    console_out(
                        f"Test at {int(ctr/60)} minute mark, {int((run_seconds-ctr)/60)} minutes left",
                        "TEST RUNNER")
            except KeyboardInterrupt:
                console_out(
                    f"Test forced to stop at {int(ctr/60)} minute mark, {int((run_seconds-ctr)/60)} minutes left)",
                    "TEST RUNNER")
                break

        try:
            chaos.stop_random_single_action_and_repair()

            if consumer_count > 0:
                consumer_manager.stop_random_consumer_actions()

            if include_chaos:
                chaos_thread.join(30)

            if include_con_actions:
                consumer_action_thread.join(30)
        except Exception as e:
            console_out("Failed to stop chaos cleanly: " + str(e),
                        "TEST RUNNER")

        if publisher_count > 0:
            publisher.stop_publishing()

        if consumer_count > 0:
            console_out("Resuming consumers", "TEST RUNNER")
            consumer_manager.resume_all_consumers()

            console_out("Starting grace period for consumer to catch up",
                        "TEST RUNNER")
            ctr = 0

            try:
                while ctr < grace_period_sec:
                    if publisher_count > 0 and msg_monitor.get_unique_count(
                    ) >= publisher.get_pos_ack_count() and len(
                            publisher.get_msg_set().difference(
                                msg_monitor.get_msg_set())) == 0:
                        break
                    time.sleep(1)
                    ctr += 1
            except KeyboardInterrupt:
                console_out("Grace period ended", "TEST RUNNER")

        console_out("RESULTS ----------------------------------------",
                    "TEST RUNNER")
        if publisher_count > 0:
            confirmed_set = publisher.get_msg_set()
            not_consumed_msgs = confirmed_set.difference(
                msg_monitor.get_msg_set())
            console_out(
                f"Confirmed count: {publisher.get_pos_ack_count()} Received count: {msg_monitor.get_receive_count()} Unique received: {msg_monitor.get_unique_count()}",
                "TEST RUNNER")
        else:
            not_consumed_msgs = set()
            console_out(
                f"Received count: {msg_monitor.get_receive_count()} Unique received: {msg_monitor.get_unique_count()}",
                "TEST RUNNER")

        success = True
        if consumer_count > 0:
            if len(not_consumed_msgs) > 0:
                if sac_enabled:
                    console_out(
                        f"FAILED TEST: Potential message loss or failure of consumers to consume or failure to promote Waiting to Active. Not consumed count: {len(not_consumed_msgs)}",
                        "TEST RUNNER")
                else:
                    console_out(
                        f"FAILED TEST: Potential message loss or failure of consumers to consume. Not consumed count: {len(not_consumed_msgs)}",
                        "TEST RUNNER")
                failed_test_log.append(
                    f"Test {test_number} FAILURE: Potential Message Loss. {len(not_consumed_msgs)} messsages."
                )
                failed_tests.add(test_number)

                lost_ctr = 0
                sorted_msgs = list(not_consumed_msgs)
                sorted_msgs.sort()
                for msg in sorted_msgs:
                    console_out(f"Lost? {msg}", "TEST RUNNER")
                    lost_ctr += 1
                    if lost_ctr > 500:
                        console_out("More than 500, truncated list",
                                    "TEST RUNNER")
                        break

                success = False

            if msg_monitor.get_out_of_order() == True:
                success = False
                console_out(f"FAILED TEST: Received out-of-order messages",
                            "TEST RUNNER")
                failed_test_log.append(
                    f"Test {test_number} FAILURE: Received out-of-order messages"
                )
                failed_tests.add(test_number)

        if success:
            console_out("TEST OK", "TEST RUNNER")

        console_out("RESULTS END ------------------------------------",
                    "TEST RUNNER")

        try:
            if consumer_count > 0:
                consumer_manager.stop_all_consumers()

            if publisher_count == 1:
                pub_thread.join(30)
            msg_monitor.stop_consuming()
            monitor_thread.join(30)
        except Exception as e:
            console_out_exception("Failed to clean up test correctly.", e,
                                  "TEST RUNNER")

        broker_manager.zip_log_files(test_name, test_number)
        console_out(f"TEST {str(test_number )} COMPLETE", "TEST RUNNER")

    console_out("", "TEST RUNNER")
    console_out("SUMMARY", "TEST RUNNER")
    console_out(f"OK {tests - len(failed_tests)} FAIL {len(failed_tests)}",
                "TEST RUNNER")
    for line in failed_test_log:
        console_out(line, "TEST RUNNER")

    console_out("TEST RUN COMPLETE", "TEST RUNNER")
Esempio n. 23
0
def main():
    args = get_args(sys.argv)

    tests = int(get_mandatory_arg(args, "--tests"))
    run_minutes = int(get_mandatory_arg(args, "--run-minutes"))
    consumer_count = int(get_mandatory_arg(args, "--consumers"))
    grace_period_sec = int(get_mandatory_arg(args, "--grace-period-sec"))
    topic = get_mandatory_arg(args, "--topic")
    partitions = get_mandatory_arg(args, "--partitions")

    cluster_size = get_optional_arg(args, "--cluster", "3")
    in_flight_max = int(get_optional_arg(args, "--in-flight-max", 100))
    min_insync_reps = int(get_optional_arg(args, "--min-insync-replicas", "1"))
    unclean_failover = get_optional_arg(args, "--unclean-failover", "false")
    sequence_count = int(get_optional_arg(args, "--sequences", "1"))
    rep_factor = get_optional_arg(args, "--rep-factor", "3")
    acks_mode = get_optional_arg(args, "--acks-mode", "all")
    print_mod = int(get_optional_arg(args, "--print-mod", "0"))

    if print_mod == 0:
        print_mod = in_flight_max * 3

    for test_number in range(tests):

        print("")
        console_out(f"TEST RUN: {str(test_number)} --------------------------",
                    "TEST RUNNER")
        subprocess.call(
            ["bash", "../automated/setup-test-run.sh", cluster_size, "3.8"])
        console_out(f"Waiting for cluster...", "TEST RUNNER")
        time.sleep(30)
        console_out(f"Cluster status:", "TEST RUNNER")
        subprocess.call(["bash", "../cluster/cluster-status.sh"])

        broker_manager = BrokerManager()
        broker_manager.load_initial_nodes()
        initial_nodes = broker_manager.get_initial_nodes()
        console_out(f"Initial nodes: {initial_nodes}", "TEST RUNNER")
        broker_manager.correct_advertised_listeners()

        topic_name = topic + "_" + str(test_number)
        mgmt_node = broker_manager.get_random_init_node()
        console_out(f"Creating topic {topic_name} using node {mgmt_node}",
                    "TEST RUNNER")
        broker_manager.create_topic(mgmt_node, topic_name, rep_factor,
                                    partitions, min_insync_reps,
                                    unclean_failover)

        time.sleep(10)

        msg_monitor = MessageMonitor(print_mod)
        chaos = ChaosExecutor(broker_manager)
        consumer_manager = ConsumerManager(broker_manager, msg_monitor,
                                           "TEST RUNNER", topic_name)

        pub_node = broker_manager.get_random_init_node()
        producer = KafkaProducer(test_number, 1, broker_manager, acks_mode,
                                 in_flight_max, print_mod)
        producer.create_producer()
        producer.configure_as_sequence(sequence_count)
        consumer_manager.add_consumers(consumer_count, test_number)

        monitor_thread = threading.Thread(target=msg_monitor.process_messages)
        monitor_thread.start()

        consumer_manager.start_consumers()

        pub_thread = threading.Thread(target=producer.start_producing,
                                      args=(topic_name, 10000000))
        pub_thread.start()
        console_out("producer started", "TEST RUNNER")

        init_wait_sec = 20
        console_out(
            f"Will start chaos and consumer actions in {init_wait_sec} seconds",
            "TEST RUNNER")
        time.sleep(init_wait_sec)

        chaos_thread = threading.Thread(
            target=chaos.start_random_single_action_and_repair, args=(120, ))
        chaos_thread.start()
        console_out("Chaos executor started", "TEST RUNNER")

        consumer_action_thread = threading.Thread(
            target=consumer_manager.start_random_consumer_actions,
            args=(60, 61))
        consumer_action_thread.start()
        console_out("Consumer actions started", "TEST RUNNER")

        ctr = 0
        while ctr < run_minutes:
            time.sleep(60)
            console_out(
                f"Test at {ctr} minute mark, {run_minutes-ctr} minutes left",
                "TEST RUNNER")
            ctr += 1

        try:
            chaos.stop_random_single_action_and_repair()
            consumer_manager.stop_random_consumer_actions()
            chaos_thread.join()
            consumer_action_thread.join()
        except Exception as e:
            console_out("Failed to stop chaos cleanly: " + str(e),
                        "TEST RUNNER")

        console_out("Resuming consumers", "TEST RUNNER")
        consumer_manager.resume_all_consumers()

        publisher.stop(True)
        console_out("starting grace period for consumer to catch up",
                    "TEST RUNNER")
        ctr = 0

        while ctr < grace_period_sec:
            if msg_monitor.get_unique_count() >= publisher.get_pos_ack_count(
            ) and len(publisher.get_msg_set().difference(
                    msg_monitor.get_msg_set())) == 0:
                break
            time.sleep(1)
            ctr += 1

        confirmed_set = publisher.get_msg_set()
        lost_msgs = confirmed_set.difference(msg_monitor.get_msg_set())

        console_out("RESULTS------------------------------------",
                    "TEST RUNNER")
        console_out(
            f"Confirmed count: {publisher.get_pos_ack_count()} Received count: {msg_monitor.get_receive_count()} Unique received: {msg_monitor.get_unique_count()}",
            "TEST RUNNER")

        success = True
        if len(lost_msgs) > 0:
            console_out(f"FAILED TEST: Lost messages: {len(lost_msgs)}",
                        "TEST RUNNER")
            success = False

        if msg_monitor.get_out_of_order() == True:
            success = False
            console_out(f"FAILED TEST: Received out-of-order messages",
                        "TEST RUNNER")

        if success:
            console_out("TEST OK", "TEST RUNNER")

        console_out("RESULTS END------------------------------------",
                    "TEST RUNNER")

        try:
            consumer_manager.stop_all_consumers()
            pub_thread.join()
            msg_monitor.stop_consuming()
            monitor_thread.join()
        except Exception as e:
            console_out("Failed to clean up test correctly: " + str(e),
                        "TEST RUNNER")

        console_out(f"TEST {str(test_number )} COMPLETE", "TEST RUNNER")
Esempio n. 24
0
#!/usr/bin/env python
import pika
import sys
import time
import subprocess
import datetime
import threading
from command_args import get_args, get_mandatory_arg, get_optional_arg
from MultiTopicConsumer import MultiTopicConsumer


def get_node_ip(node_name):
    bash_command = "bash ../cluster/get-node-ip.sh " + node_name
    process = subprocess.Popen(bash_command.split(), stdout=subprocess.PIPE)
    output, error = process.communicate()
    ip = output.decode('ascii').replace('\n', '')
    return ip


args = get_args(sys.argv)
connect_node = get_optional_arg(args, "--node", "rabbitmq1")  #sys.argv[1]
ip = get_node_ip(connect_node)

queue = get_mandatory_arg(args, "--queue")
exchanges = get_mandatory_arg(args, "--exchanges").split(',')

consumer = MultiTopicConsumer(True)
consumer.connect(ip)
consumer.declare(queue, exchanges)
consumer.consume()

def run_benchmark(topology, technology, version, config_file, run_id):
    subprocess.call([
        "bash",
        "run-logged-local-benchmark.sh",
        topology,
        technology,
        version,
        config_file,
        run_id,
    ])


args = get_args(sys.argv)
topologies_root = get_mandatory_arg(args, "--topologies-root")
playlist_file = get_mandatory_arg(args, "--playlist-file")
config_file = get_mandatory_arg(args, "--config-file")
technology = get_mandatory_arg(args, "--technology")
version = get_mandatory_arg(args, "--version")
gap_seconds = int(get_mandatory_arg(args, "--gap-seconds"))
run_id = str(uuid.uuid4())

print(f"Run ID: {run_id}")

pl_file = open(playlist_file, "r")

for line in pl_file:
    topology = line.replace("\n", "")

    run_benchmark(topologies_root + "/" + topology, technology, version,
    output, error = process.communicate()
    ip = output.decode('ascii').replace('\n', '')
    return ip


def get_live_nodes():
    bash_command = "bash ../cluster/list-live-nodes.sh"
    process = subprocess.Popen(bash_command.split(), stdout=subprocess.PIPE)
    output, error = process.communicate()
    nodes_line = output.decode('ascii').replace('\n', '')
    nodes = list()
    for node in nodes_line.split(' '):
        if node != '' and node.isspace() == False:
            nodes.append(node)

    return nodes

args = get_args(sys.argv)
connect_node = get_optional_arg(args, "--node", "rabbitmq1") #sys.argv[1]
queue = get_mandatory_arg(args, "--queue")
#exchanges = get_mandatory_arg(args, "--exchanges").split(',')

live_nodes = get_live_nodes()
consumer = MultiTopicConsumer("1", live_nodes, True, 100, connect_node)
consumer.connect()
#consumer.declare(queue, exchanges)
consumer.set_queue(queue)
consumer.consume()


Esempio n. 27
0
def main():
    args = get_args(sys.argv)

    node_count = 3
    count = -1  # no limit
    tests = int(get_mandatory_arg(args, "--tests"))
    run_minutes = int(get_mandatory_arg(args, "--run-minutes"))
    consumer_count = int(get_mandatory_arg(args, "--consumers"))
    in_flight_max = int(get_optional_arg(args, "--in-flight-max", 10))
    grace_period_sec = int(get_mandatory_arg(args, "--grace-period-sec"))
    cluster_size = get_optional_arg(args, "--cluster", "3")
    queue = get_mandatory_arg(args, "--queue")
    queue_type = get_mandatory_arg(args, "--queue-type")

    message_type = "sequence"

    for test_number in range(tests):

        print("")
        console_out(f"TEST RUN: {str(test_number)} --------------------------",
                    "TEST RUNNER")
        subprocess.call(
            ["bash", "../automated/setup-test-run.sh", cluster_size, "3.8"])
        console_out(f"Waiting for cluster...", "TEST RUNNER")
        time.sleep(30)
        console_out(f"Cluster status:", "TEST RUNNER")
        subprocess.call(["bash", "../cluster/cluster-status.sh"])

        broker_manager = BrokerManager()
        broker_manager.load_initial_nodes()
        initial_nodes = broker_manager.get_initial_nodes()
        console_out(f"Initial nodes: {initial_nodes}", "TEST RUNNER")

        print_mod = 5000
        queue_name = queue + "_" + str(test_number)
        mgmt_node = broker_manager.get_random_init_node()
        queue_created = False

        while queue_created == False:
            queue_created = broker_manager.create_sac_queue(
                mgmt_node, queue_name, cluster_size, queue_type)
            if queue_created == False:
                time.sleep(5)

        time.sleep(10)

        msg_monitor = MessageMonitor(print_mod)
        stats = QueueStats('jack', 'jack', queue_name)
        chaos = ChaosExecutor(initial_nodes)
        consumer_manager = ConsumerManager(broker_manager, msg_monitor,
                                           "TEST RUNNER")

        pub_node = broker_manager.get_random_init_node()
        publisher = RabbitPublisher(str(test_number), initial_nodes, pub_node,
                                    in_flight_max, 120, print_mod)
        consumer_manager.add_consumers(consumer_count, test_number, queue_name)

        monitor_thread = threading.Thread(target=msg_monitor.process_messages)
        monitor_thread.start()

        consumer_manager.start_consumers()

        pub_thread = threading.Thread(target=publisher.publish_direct,
                                      args=(queue_name, count, 1, 0,
                                            "sequence"))
        pub_thread.start()
        console_out("publisher started", "TEST RUNNER")

        init_wait_sec = 20
        console_out(
            f"Will start chaos and consumer actions in {init_wait_sec} seconds",
            "TEST RUNNER")
        time.sleep(init_wait_sec)

        chaos_thread = threading.Thread(
            target=chaos.start_random_single_action_and_repair, args=(90, ))
        chaos_thread.start()
        console_out("Chaos executor started", "TEST RUNNER")

        consumer_action_thread = threading.Thread(
            target=consumer_manager.start_random_consumer_actions,
            args=(5, 30))
        consumer_action_thread.start()
        console_out("Consumer actions started", "TEST RUNNER")

        ctr = 0
        while ctr < run_minutes:
            time.sleep(60)
            ctr += 1
            console_out(
                f"Test at {ctr} minute mark, {run_minutes-ctr} minutes left",
                "TEST RUNNER")

        try:
            chaos.stop_random_single_action_and_repair()
            consumer_manager.stop_random_consumer_actions()
            chaos_thread.join()
            consumer_action_thread.join()
        except Exception as e:
            console_out("Failed to stop chaos cleanly: " + str(e),
                        "TEST RUNNER")

        console_out("Resuming consumers", "TEST RUNNER")
        consumer_manager.resume_all_consumers()

        publisher.stop(True)
        console_out("starting grace period for consumer to catch up",
                    "TEST RUNNER")
        ctr = 0

        while ctr < grace_period_sec:
            if msg_monitor.get_unique_count() >= publisher.get_pos_ack_count(
            ) and len(publisher.get_msg_set().difference(
                    msg_monitor.get_msg_set())) == 0:
                break
            time.sleep(1)
            ctr += 1

        confirmed_set = publisher.get_msg_set()
        not_consumed_msgs = confirmed_set.difference(msg_monitor.get_msg_set())

        console_out("RESULTS------------------------------------",
                    "TEST RUNNER")
        console_out(
            f"Confirmed count: {publisher.get_pos_ack_count()} Received count: {msg_monitor.get_receive_count()} Unique received: {msg_monitor.get_unique_count()}",
            "TEST RUNNER")

        success = True
        if len(not_consumed_msgs) > 0:
            console_out(
                f"FAILED TEST: Potential failure to promote Waiting to Active. Not consumed count: {len(not_consumed_msgs)}",
                "TEST RUNNER")
            success = False

        if msg_monitor.get_out_of_order() == True:
            success = False
            console_out(f"FAILED TEST: Received out-of-order messages",
                        "TEST RUNNER")

        if success:
            console_out("TEST OK", "TEST RUNNER")

        console_out("RESULTS END------------------------------------",
                    "TEST RUNNER")

        try:
            consumer_manager.stop_all_consumers()
            pub_thread.join()
            msg_monitor.stop_consuming()
            monitor_thread.join()
        except Exception as e:
            console_out("Failed to clean up test correctly: " + str(e),
                        "TEST RUNNER")

        console_out(f"TEST {str(test_number )} COMPLETE", "TEST RUNNER")
Esempio n. 28
0
def main():
    print("publish-consume.py")
    args = get_args(sys.argv)

    # cluster
    new_cluster = is_true(
        get_optional_arg_validated(args, "--new-cluster", "false",
                                   ["true", "false"]))
    if new_cluster:
        cluster_size = int(get_mandatory_arg(args, "--cluster-size"))
    else:
        cluster_size = int(get_optional_arg(args, "--cluster-size", "3"))

    rmq_version = get_optional_arg_validated(args, "--rmq-version", "3.8-beta",
                                             ["3.7", "3.8-beta", "3.8-alpha"])

    # queues and exchanges
    exchanges = as_list(get_optional_arg(args, "--exchanges", ""))
    queue_name = get_mandatory_arg(args, "--queue")
    queue_type = get_optional_arg_validated(args, "--queue-type", "mirrored",
                                            ["mirrored", "quorum"])
    qq_max_length = int(get_optional_arg(args, "--qq-max-length", "0"))
    rep_factor = int(get_optional_arg(args, "--rep-factor", str(cluster_size)))
    sac_enabled = is_true(
        get_optional_arg_validated(args, "--sac", "false", ["true", "false"]))

    if rmq_version == "3.7":
        if sac_enabled:
            console_out("Cannot use SAC mode with RabbitMQ 3.7", "TEST RUNNER")
            exit(1)

        if queue_type == "quorum":
            console_out("Cannot use quorum queues with RabbitMQ 3.7",
                        "TEST RUNNER")
            exit(1)

    # publisher
    publisher_count = int(get_optional_arg(args, "--publishers", "1"))
    pub_mode = get_optional_arg_validated(args, "--pub-mode", "direct",
                                          ["direct", "exchange"])
    msg_mode = get_optional_arg_validated(
        args, "--msg-mode", "sequence",
        ["sequence", "partitioned-sequence", "large-msgs", "hello"])
    count = int(get_mandatory_arg(args, "--msgs"))
    dup_rate = float(get_optional_arg(args, "--dup-rate", "0"))
    sequence_count = int(get_optional_arg(args, "--sequences", 1))
    in_flight_max = int(get_optional_arg(args, "--in-flight-max", 10))

    # consumers
    consumer_count = int(get_optional_arg(args, "--consumers", "1"))
    prefetch = int(get_optional_arg(args, "--pre-fetch", "10"))
    analyze = is_true(
        get_optional_arg_validated(args, "--analyze", "true",
                                   ["true", "false"]))

    print_mod = get_optional_arg(args, "--print-mod", in_flight_max * 5)

    broker_manager = BrokerManager()
    broker_manager.deploy(cluster_size, new_cluster, rmq_version, False)

    mgmt_node = broker_manager.get_random_init_node()
    queue_created = False
    while queue_created == False:
        if queue_type == "mirrored":
            if sac_enabled:
                queue_created = broker_manager.create_standard_sac_queue(
                    mgmt_node, queue_name, rep_factor)
            else:
                queue_created = broker_manager.create_standard_queue(
                    mgmt_node, queue_name, rep_factor)
        elif queue_type == "quorum":
            if sac_enabled:
                queue_created = broker_manager.create_quorum_sac_queue(
                    mgmt_node, queue_name, rep_factor, qq_max_length)
            else:
                queue_created = broker_manager.create_quorum_queue(
                    mgmt_node, queue_name, rep_factor, qq_max_length)

        if queue_created == False:
            time.sleep(5)

    broker_manager.declare_exchanges(queue_name, exchanges)

    time.sleep(10)

    if consumer_count > 0:
        msg_monitor = MessageMonitor("pub-con", 1, print_mod, analyze, False)
        consumer_manager = ConsumerManager(broker_manager, msg_monitor,
                                           "TEST RUNNER", False)
        consumer_manager.add_consumers(consumer_count, 1, queue_name, prefetch)

        monitor_thread = threading.Thread(target=msg_monitor.process_messages)
        monitor_thread.start()

        consumer_manager.start_consumers()

    if publisher_count > 0:
        pub_manager = PublisherManager(broker_manager, 1, "TEST RUNNER",
                                       publisher_count, in_flight_max,
                                       print_mod)

        if pub_mode == "direct":
            if msg_mode == "sequence":
                pub_manager.add_sequence_direct_publishers(
                    queue_name, count, dup_rate, sequence_count)
            elif pub_mode == "partitioned-sequence":
                print("Cannot use partitioned sequence mode with direct mode")
                exit(1)
            elif pub_mode == "large-msgs":
                msg_size = int(get_mandatory_arg(args, "--msg-size"))
                pub_manager.add_large_msgs_direct_publishers(
                    queue_name, count, dup_rate, msg_size)
            else:
                pub_manager.add_hello_msgs_direct_publishers(
                    queue_name, count, dup_rate)
        elif pub_mode == "exchange":
            if len(exchanges) == 0:
                console_out("No exchanges provided", "TEST RUNNER")
                exit(1)

            if msg_mode == "sequence":
                pub_manager.add_sequence_to_exchanges_publishers(
                    exchanges, "", count, dup_rate, sequence_count)
            elif msg_mode == "partitioned-sequence":
                pub_manager.add_partitioned_sequence_to_exchanges_publishers(
                    exchanges, count, dup_rate, sequence_count)
            elif msg_mode == "large-msgs":
                msg_size = int(get_mandatory_arg(args, "--msg-size"))
                pub_manager.add_large_msgs_to_exchanges_publishers(
                    exchanges, "", count, dup_rate, msg_size)
            else:
                pub_manager.add_hello_msgs_to_exchanges_publishers(
                    exchanges, "", count, dup_rate)

        pub_manager.start_publishers()

    while True:
        try:
            console_out(
                "Press + to add a consumer, - to remove a consumer, ! to remove the active consumer (SAC only)",
                "TEST_RUNNER")
            input_str = input()
            if input_str == "+":
                consumer_manager.add_consumer_and_start_consumer(
                    1, queue_name, prefetch)
            elif input_str == "-":
                consumer_manager.stop_and_remove_oldest_consumer()
            else:
                consumer_manager.stop_and_remove_specfic_consumer(input_str)
        except KeyboardInterrupt:
            if publisher_count > 0:
                console_out(
                    "Stopping publishers. Starting grace period for consumers to catch up.",
                    "TEST_RUNNER")
                pub_manager.stop_all_publishers()
            break

    if publisher_count > 0 and consumer_count > 0:
        try:
            ctr = 0
            while ctr < 300:
                if msg_monitor.get_unique_count(
                ) >= pub_manager.get_total_pos_ack_count() and len(
                        pub_manager.get_total_msg_set().difference(
                            msg_monitor.get_msg_set())) == 0:
                    break
                time.sleep(1)
                ctr += 1
        except KeyboardInterrupt:
            console_out("Grace period ended", "TEST RUNNER")

        confirmed_set = pub_manager.get_total_msg_set()
        lost_msgs = confirmed_set.difference(msg_monitor.get_msg_set())

        console_out("RESULTS------------------------------------",
                    "TEST RUNNER")
        console_out(
            f"Confirmed count: {pub_manager.get_total_pos_ack_count()} Received count: {msg_monitor.get_receive_count()} Unique received: {msg_monitor.get_unique_count()}",
            "TEST RUNNER")

        if analyze:
            success = True
            if len(lost_msgs) > 0:
                console_out(f"FAILED TEST: Lost messages: {len(lost_msgs)}",
                            "TEST RUNNER")
                success = False

            if msg_monitor.get_out_of_order() == True:
                success = False
                console_out(f"FAILED TEST: Received out-of-order messages",
                            "TEST RUNNER")

            if success:
                console_out("TEST OK", "TEST RUNNER")

    elif publisher_count > 0:
        console_out("RESULTS------------------------------------",
                    "TEST RUNNER")
        console_out(
            f"Confirmed count: {pub_manager.get_total_pos_ack_count()}",
            "TEST RUNNER")
    elif consumer_count > 0:
        console_out("RESULTS------------------------------------",
                    "TEST RUNNER")
        console_out(
            f"Received count: {msg_monitor.get_receive_count()} Unique received: {msg_monitor.get_unique_count()}",
            "TEST RUNNER")

    console_out("RESULTS END------------------------------------",
                "TEST RUNNER")

    try:
        if consumer_count > 0:
            consumer_manager.stop_all_consumers()
            msg_monitor.stop_consuming()
            monitor_thread.join(10)
    except Exception as e:
        console_out("Failed to clean up test correctly: " + str(e),
                    "TEST RUNNER")

    console_out(f"TEST 1 COMPLETE", "TEST RUNNER")
Esempio n. 29
0
def main():
    args = get_args(sys.argv)

    node_count = 3
    count = -1  # no limit
    tests = int(get_mandatory_arg(args, "--tests"))
    actions = int(get_mandatory_arg(args, "--actions"))
    in_flight_max = int(get_optional_arg(args, "--in-flight-max", 10))
    grace_period_sec = int(get_mandatory_arg(args, "--grace-period-sec"))
    cluster_size = get_optional_arg(args, "--cluster", "3")
    queue = get_mandatory_arg(args, "--queue")
    sac = get_mandatory_arg(args, "--sac")
    chaos_mode = get_optional_arg(args, "--chaos-mode", "mixed")
    chaos_min_interval = int(
        get_optional_arg(args, "--chaos-min-interval", "30"))
    chaos_max_interval = int(
        get_optional_arg(args, "--chaos-max-interval", "120"))
    message_type = "sequence"
    queue_type = get_mandatory_arg(args, "--queue-type")

    sac_enabled = True
    if sac.upper() == "FALSE":
        sac_enabled = False

    for test_number in range(tests):

        print("")
        console_out(f"TEST RUN: {str(test_number)} --------------------------",
                    "TEST RUNNER")
        subprocess.call(
            ["bash", "../automated/setup-test-run.sh", cluster_size, "3.8"])
        console_out(f"Waiting for cluster...", "TEST RUNNER")
        time.sleep(30)
        console_out(f"Cluster status:", "TEST RUNNER")
        subprocess.call(["bash", "../cluster/cluster-status.sh"])

        broker_manager = BrokerManager()
        broker_manager.load_initial_nodes()
        initial_nodes = broker_manager.get_initial_nodes()

        console_out(f"Initial nodes: {initial_nodes}", "TEST RUNNER")

        pub_node = broker_manager.get_random_init_node()
        con_node = broker_manager.get_random_init_node()
        console_out(f"publish to: {pub_node}", "TEST RUNNER")
        console_out(f"consume from: {con_node}", "TEST RUNNER")

        print_mod = in_flight_max * 5
        queue_name = queue + "_" + str(test_number)

        mgmt_node = broker_manager.get_random_init_node()
        queue_created = False
        while queue_created == False:
            if sac_enabled:
                queue_created = broker_manager.create_sac_queue(
                    mgmt_node, queue_name, cluster_size, queue_type)
            else:
                queue_created = broker_manager.create_queue(
                    mgmt_node, queue_name, cluster_size, queue_type)
            if queue_created == False:
                time.sleep(5)

        time.sleep(10)

        msg_monitor = MessageMonitor(print_mod)
        publisher = RabbitPublisher(f"PUBLISHER(Test:{test_number} Id:P1)",
                                    initial_nodes, pub_node, in_flight_max,
                                    120, print_mod)
        consumer_manager = ConsumerManager(broker_manager, msg_monitor,
                                           "TEST RUNNER")
        consumer_manager.add_consumers(1, test_number, queue_name)

        stats = QueueStats('jack', 'jack', queue_name)
        chaos = ChaosExecutor(initial_nodes)

        if chaos_mode == "partitions":
            chaos.only_partitions()
        elif chaos_mode == "nodes":
            chaos.only_kill_nodes()

        monitor_thread = threading.Thread(target=msg_monitor.process_messages)
        monitor_thread.start()

        consumer_manager.start_consumers()

        pub_thread = threading.Thread(target=publisher.publish_direct,
                                      args=(queue_name, count, 1, 0,
                                            "sequence"))
        pub_thread.start()
        console_out("publisher started", "TEST RUNNER")

        for action_num in range(0, actions):
            wait_sec = random.randint(chaos_min_interval, chaos_max_interval)
            console_out(f"waiting for {wait_sec} seconds before next action",
                        "TEST RUNNER")
            time.sleep(wait_sec)

            console_out(
                f"execute chaos action {str(action_num)} of test {str(test_number)}",
                "TEST RUNNER")
            chaos.execute_chaos_action()
            subprocess.call(["bash", "../cluster/cluster-status.sh"])

        time.sleep(60)
        console_out("repairing cluster", "TEST RUNNER")
        chaos.repair()
        console_out("repaired cluster", "TEST RUNNER")

        publisher.stop(True)

        console_out("starting grace period for consumer to catch up",
                    "TEST RUNNER")
        ctr = 0

        while ctr < grace_period_sec:
            if msg_monitor.get_unique_count() >= publisher.get_pos_ack_count(
            ) and len(publisher.get_msg_set().difference(
                    msg_monitor.get_msg_set())) == 0:
                break
            time.sleep(1)
            ctr += 1

        confirmed_set = publisher.get_msg_set()
        lost_msgs = confirmed_set.difference(msg_monitor.get_msg_set())

        console_out("RESULTS------------------------------------",
                    "TEST RUNNER")

        if len(lost_msgs) > 0:
            console_out(f"Lost messages count: {len(lost_msgs)}",
                        "TEST RUNNER")
            for msg in lost_msgs:
                console_out(f"Lost message: {msg}", "TEST RUNNER")

        console_out(
            f"Confirmed count: {publisher.get_pos_ack_count()} Received count: {msg_monitor.get_receive_count()} Unique received: {msg_monitor.get_unique_count()}",
            "TEST RUNNER")
        success = True

        if msg_monitor.get_out_of_order() == True:
            console_out("FAILED TEST: OUT OF ORDER MESSAGES", "TEST RUNNER")
            success = False

        if len(lost_msgs) > 0:
            console_out("FAILED TEST: LOST MESSAGES", "TEST RUNNER")
            success = False

        if success == True:
            console_out("TEST OK", "TEST RUNNER")

        console_out("RESULTS END------------------------------------",
                    "TEST RUNNER")

        try:
            consumer_manager.stop_all_consumers()
            con_thread.join()
            pub_thread.join()
        except Exception as e:
            console_out("Failed to clean up test correctly: " + str(e),
                        "TEST RUNNER")

        console_out(f"TEST {str(test_number)} COMPLETE", "TEST RUNNER")
#!/usr/bin/env python
import pika
from pika import spec
import sys
import time
import subprocess
import datetime
import uuid
import random
from command_args import get_args, get_mandatory_arg, get_optional_arg

args = get_args(sys.argv)

connect_node = get_optional_arg(args, "--node", "rabbitmq1")
node_count = int(get_optional_arg(args, "--cluster-size", "3"))
queue = get_mandatory_arg(args, "--queue")
count = int(get_mandatory_arg(args, "--msgs"))
state_count = int(get_mandatory_arg(args, "--keys"))
dup_rate = float(get_optional_arg(args, "--dup-rate", "0"))
total = count * state_count

if state_count > 10:
    print("Key count limit is 10")
    exit(1)

terminate = False
exit_triggered = False
last_ack_time = datetime.datetime.now()
last_ack = 0

node_names = []