def add_hello_msgs_direct_publishers(self, queue_name, count, dup_rate):
     for pub_id in range(1, self.publisher_count + 1):
         publisher = RabbitPublisher(pub_id, self.test_number,
                                     self.broker_manager,
                                     self.in_flight_max, 120,
                                     self.print_mod)
         publisher.configure_hello_msgs_direct(queue_name, count, dup_rate)
         self.publishers.append(publisher)
 def add_hello_msgs_to_exchanges_publishers(self, exchanges, routing_key,
                                            count, dup_rate):
     for pub_id in range(1, self.publisher_count + 1):
         publisher = RabbitPublisher(pub_id, self.test_number,
                                     self.broker_manager,
                                     self.in_flight_max, 120,
                                     self.print_mod)
         publisher.configure_hello_msgs_to_exchanges(
             exchanges, routing_key, count, dup_rate)
         self.publishers.append(publisher)
 def add_partitioned_sequence_to_exchanges_publishers(
         self, exchanges, count, dup_rate, sequence_count):
     for pub_id in range(1, self.publisher_count + 1):
         publisher = RabbitPublisher(pub_id, self.test_number,
                                     self.broker_manager,
                                     self.in_flight_max, 120,
                                     self.print_mod)
         publisher.configure_partitioned_sequence_to_exchanges(
             exchanges, count, dup_rate, sequence_count)
         self.publishers.append(publisher)
예제 #4
0
def main():
    args = get_args(sys.argv)

    connect_node = get_optional_arg(args, "--node", "rabbitmq1")
    exchange = get_optional_arg(args, "--ex", "")
    count = int(get_mandatory_arg(args, "--msgs"))
    state_count = int(get_mandatory_arg(args, "--keys"))
    dup_rate = float(get_optional_arg(args, "--dup-rate", "0"))
    routing_key = get_optional_arg(args, "--rk", "hello")
    queue = get_optional_arg(args, "--queue", None)
    partitioned = get_optional_arg(args, "--partitioned", "false")
    exchanges_arg = get_optional_arg(args, "--exchanges", "")
        
    message_type = "sequence"
    if partitioned == "true":
        if queue != None:
            print("Cannot set partitioning mode and set a queue. Must publish to an exchange")
            exit(1)
        message_type = "partitioned-sequence"

    live_nodes = get_live_nodes()
    
    publisher = RabbitPublisher("1", live_nodes, connect_node, 1000, 100, 100)

    if queue != None:
        print("direct to queue publishing")
        publisher.publish_direct(queue, count, state_count, dup_rate, message_type)

    elif len(exchanges_arg) > 0:
        print("multi-exchange publishing")
        exchanges = exchanges_arg.split(",")
        publisher.publish_to_exchanges(exchanges, routing_key, count, state_count, dup_rate, message_type)
    else:
        print("single exchange publishing")
        publisher.publish(exchange, routing_key, count, state_count, dup_rate, message_type)
예제 #5
0
def main():
    args = get_args(sys.argv)

    connect_node = get_optional_arg(args, "--node", "rabbitmq1")
    node_count = int(get_optional_arg(args, "--cluster-size", "3"))
    exchange = get_optional_arg(args, "--ex", "")
    count = int(get_mandatory_arg(args, "--msgs"))
    dup_rate = float(get_optional_arg(args, "--dup-rate", "0"))
    routing_key = get_optional_arg(args, "--rk", "hello")
    queue = get_optional_arg(args, "--queue", None)
    message_type = get_optional_arg(args, "--msg-type", "hello")

    publisher = RabbitPublisher(node_count, connect_node)
    stats = QueueStats('jack', 'jack', queue)

    try:
        if queue != None:
            print("direct")
            publisher.publish_direct(queue, count, 1, dup_rate, message_type)
        else:
            publisher.publish(exchange, routing_key, count, 1, dup_rate,
                              message_type)

        queue_length = stats.get_queue_length(connect_node)
        print(f"Number of message in queue: {queue_length}")

    except NameError as e:
        print(f"Unexpected error: {str(e)}")
예제 #6
0
def main():
    args = get_args(sys.argv)

    connect_node = get_optional_arg(args, "--node", "rabbitmq1")
    node_count = int(get_optional_arg(args, "--cluster-size", "3"))
    exchange = get_optional_arg(args, "--ex", "")
    count = int(get_mandatory_arg(args, "--msgs"))
    dup_rate = float(get_optional_arg(args, "--dup-rate", "0"))
    routing_key = get_optional_arg(args, "--rk", "hello")
    queue = get_optional_arg(args, "--queue", None)
    message_type = get_optional_arg(args, "--msg-type", "hello")

    publisher = RabbitPublisher(node_count, connect_node)

    try:
        if queue != None:
            print("direct")
            publisher.publish_direct(queue, count, 1, dup_rate, message_type)
        else:
            publisher.publish(exchange, routing_key, count, 1, dup_rate,
                              message_type)
    except:
        print("Publishing aborted, final stats:")
        print(publisher.print_final_count())
예제 #7
0
def main():
    args = get_args(sys.argv)

    node_count = 3
    count = -1  # no limit
    tests = int(get_mandatory_arg(args, "--tests"))
    actions = int(get_mandatory_arg(args, "--actions"))
    in_flight_max = int(get_optional_arg(args, "--in-flight-max", 10))
    grace_period_sec = int(get_mandatory_arg(args, "--grace-period-sec"))
    cluster_size = get_optional_arg(args, "--cluster", "3")
    queue = get_mandatory_arg(args, "--queue")
    sac = get_mandatory_arg(args, "--sac")
    chaos_mode = get_optional_arg(args, "--chaos-mode", "mixed")
    chaos_min_interval = int(
        get_optional_arg(args, "--chaos-min-interval", "30"))
    chaos_max_interval = int(
        get_optional_arg(args, "--chaos-max-interval", "120"))
    message_type = "sequence"
    queue_type = get_mandatory_arg(args, "--queue-type")

    sac_enabled = True
    if sac.upper() == "FALSE":
        sac_enabled = False

    for test_number in range(tests):

        print("")
        console_out(f"TEST RUN: {str(test_number)} --------------------------",
                    "TEST RUNNER")
        subprocess.call(
            ["bash", "../automated/setup-test-run.sh", cluster_size, "3.8"])
        console_out(f"Waiting for cluster...", "TEST RUNNER")
        time.sleep(30)
        console_out(f"Cluster status:", "TEST RUNNER")
        subprocess.call(["bash", "../cluster/cluster-status.sh"])

        broker_manager = BrokerManager()
        broker_manager.load_initial_nodes()
        initial_nodes = broker_manager.get_initial_nodes()

        console_out(f"Initial nodes: {initial_nodes}", "TEST RUNNER")

        pub_node = broker_manager.get_random_init_node()
        con_node = broker_manager.get_random_init_node()
        console_out(f"publish to: {pub_node}", "TEST RUNNER")
        console_out(f"consume from: {con_node}", "TEST RUNNER")

        print_mod = in_flight_max * 5
        queue_name = queue + "_" + str(test_number)

        mgmt_node = broker_manager.get_random_init_node()
        queue_created = False
        while queue_created == False:
            if sac_enabled:
                queue_created = broker_manager.create_sac_queue(
                    mgmt_node, queue_name, cluster_size, queue_type)
            else:
                queue_created = broker_manager.create_queue(
                    mgmt_node, queue_name, cluster_size, queue_type)
            if queue_created == False:
                time.sleep(5)

        time.sleep(10)

        msg_monitor = MessageMonitor(print_mod)
        publisher = RabbitPublisher(f"PUBLISHER(Test:{test_number} Id:P1)",
                                    initial_nodes, pub_node, in_flight_max,
                                    120, print_mod)
        consumer_manager = ConsumerManager(broker_manager, msg_monitor,
                                           "TEST RUNNER")
        consumer_manager.add_consumers(1, test_number, queue_name)

        stats = QueueStats('jack', 'jack', queue_name)
        chaos = ChaosExecutor(initial_nodes)

        if chaos_mode == "partitions":
            chaos.only_partitions()
        elif chaos_mode == "nodes":
            chaos.only_kill_nodes()

        monitor_thread = threading.Thread(target=msg_monitor.process_messages)
        monitor_thread.start()

        consumer_manager.start_consumers()

        pub_thread = threading.Thread(target=publisher.publish_direct,
                                      args=(queue_name, count, 1, 0,
                                            "sequence"))
        pub_thread.start()
        console_out("publisher started", "TEST RUNNER")

        for action_num in range(0, actions):
            wait_sec = random.randint(chaos_min_interval, chaos_max_interval)
            console_out(f"waiting for {wait_sec} seconds before next action",
                        "TEST RUNNER")
            time.sleep(wait_sec)

            console_out(
                f"execute chaos action {str(action_num)} of test {str(test_number)}",
                "TEST RUNNER")
            chaos.execute_chaos_action()
            subprocess.call(["bash", "../cluster/cluster-status.sh"])

        time.sleep(60)
        console_out("repairing cluster", "TEST RUNNER")
        chaos.repair()
        console_out("repaired cluster", "TEST RUNNER")

        publisher.stop(True)

        console_out("starting grace period for consumer to catch up",
                    "TEST RUNNER")
        ctr = 0

        while ctr < grace_period_sec:
            if msg_monitor.get_unique_count() >= publisher.get_pos_ack_count(
            ) and len(publisher.get_msg_set().difference(
                    msg_monitor.get_msg_set())) == 0:
                break
            time.sleep(1)
            ctr += 1

        confirmed_set = publisher.get_msg_set()
        lost_msgs = confirmed_set.difference(msg_monitor.get_msg_set())

        console_out("RESULTS------------------------------------",
                    "TEST RUNNER")

        if len(lost_msgs) > 0:
            console_out(f"Lost messages count: {len(lost_msgs)}",
                        "TEST RUNNER")
            for msg in lost_msgs:
                console_out(f"Lost message: {msg}", "TEST RUNNER")

        console_out(
            f"Confirmed count: {publisher.get_pos_ack_count()} Received count: {msg_monitor.get_receive_count()} Unique received: {msg_monitor.get_unique_count()}",
            "TEST RUNNER")
        success = True

        if msg_monitor.get_out_of_order() == True:
            console_out("FAILED TEST: OUT OF ORDER MESSAGES", "TEST RUNNER")
            success = False

        if len(lost_msgs) > 0:
            console_out("FAILED TEST: LOST MESSAGES", "TEST RUNNER")
            success = False

        if success == True:
            console_out("TEST OK", "TEST RUNNER")

        console_out("RESULTS END------------------------------------",
                    "TEST RUNNER")

        try:
            consumer_manager.stop_all_consumers()
            con_thread.join()
            pub_thread.join()
        except Exception as e:
            console_out("Failed to clean up test correctly: " + str(e),
                        "TEST RUNNER")

        console_out(f"TEST {str(test_number)} COMPLETE", "TEST RUNNER")
예제 #8
0
def main():
    args = get_args(sys.argv)

    node_count = 3
    count = -1  # no limit
    tests = int(get_mandatory_arg(args, "--tests"))
    run_minutes = int(get_mandatory_arg(args, "--run-minutes"))
    consumer_count = int(get_mandatory_arg(args, "--consumers"))
    in_flight_max = int(get_optional_arg(args, "--in-flight-max", 10))
    grace_period_sec = int(get_mandatory_arg(args, "--grace-period-sec"))
    cluster_size = get_optional_arg(args, "--cluster", "3")
    queue = get_mandatory_arg(args, "--queue")
    queue_type = get_mandatory_arg(args, "--queue-type")

    message_type = "sequence"

    for test_number in range(tests):

        print("")
        console_out(f"TEST RUN: {str(test_number)} --------------------------",
                    "TEST RUNNER")
        subprocess.call(
            ["bash", "../automated/setup-test-run.sh", cluster_size, "3.8"])
        console_out(f"Waiting for cluster...", "TEST RUNNER")
        time.sleep(30)
        console_out(f"Cluster status:", "TEST RUNNER")
        subprocess.call(["bash", "../cluster/cluster-status.sh"])

        broker_manager = BrokerManager()
        broker_manager.load_initial_nodes()
        initial_nodes = broker_manager.get_initial_nodes()
        console_out(f"Initial nodes: {initial_nodes}", "TEST RUNNER")

        print_mod = 5000
        queue_name = queue + "_" + str(test_number)
        mgmt_node = broker_manager.get_random_init_node()
        queue_created = False

        while queue_created == False:
            queue_created = broker_manager.create_sac_queue(
                mgmt_node, queue_name, cluster_size, queue_type)
            if queue_created == False:
                time.sleep(5)

        time.sleep(10)

        msg_monitor = MessageMonitor(print_mod)
        stats = QueueStats('jack', 'jack', queue_name)
        chaos = ChaosExecutor(initial_nodes)
        consumer_manager = ConsumerManager(broker_manager, msg_monitor,
                                           "TEST RUNNER")

        pub_node = broker_manager.get_random_init_node()
        publisher = RabbitPublisher(str(test_number), initial_nodes, pub_node,
                                    in_flight_max, 120, print_mod)
        consumer_manager.add_consumers(consumer_count, test_number, queue_name)

        monitor_thread = threading.Thread(target=msg_monitor.process_messages)
        monitor_thread.start()

        consumer_manager.start_consumers()

        pub_thread = threading.Thread(target=publisher.publish_direct,
                                      args=(queue_name, count, 1, 0,
                                            "sequence"))
        pub_thread.start()
        console_out("publisher started", "TEST RUNNER")

        init_wait_sec = 20
        console_out(
            f"Will start chaos and consumer actions in {init_wait_sec} seconds",
            "TEST RUNNER")
        time.sleep(init_wait_sec)

        chaos_thread = threading.Thread(
            target=chaos.start_random_single_action_and_repair, args=(90, ))
        chaos_thread.start()
        console_out("Chaos executor started", "TEST RUNNER")

        consumer_action_thread = threading.Thread(
            target=consumer_manager.start_random_consumer_actions,
            args=(5, 30))
        consumer_action_thread.start()
        console_out("Consumer actions started", "TEST RUNNER")

        ctr = 0
        while ctr < run_minutes:
            time.sleep(60)
            ctr += 1
            console_out(
                f"Test at {ctr} minute mark, {run_minutes-ctr} minutes left",
                "TEST RUNNER")

        try:
            chaos.stop_random_single_action_and_repair()
            consumer_manager.stop_random_consumer_actions()
            chaos_thread.join()
            consumer_action_thread.join()
        except Exception as e:
            console_out("Failed to stop chaos cleanly: " + str(e),
                        "TEST RUNNER")

        console_out("Resuming consumers", "TEST RUNNER")
        consumer_manager.resume_all_consumers()

        publisher.stop(True)
        console_out("starting grace period for consumer to catch up",
                    "TEST RUNNER")
        ctr = 0

        while ctr < grace_period_sec:
            if msg_monitor.get_unique_count() >= publisher.get_pos_ack_count(
            ) and len(publisher.get_msg_set().difference(
                    msg_monitor.get_msg_set())) == 0:
                break
            time.sleep(1)
            ctr += 1

        confirmed_set = publisher.get_msg_set()
        not_consumed_msgs = confirmed_set.difference(msg_monitor.get_msg_set())

        console_out("RESULTS------------------------------------",
                    "TEST RUNNER")
        console_out(
            f"Confirmed count: {publisher.get_pos_ack_count()} Received count: {msg_monitor.get_receive_count()} Unique received: {msg_monitor.get_unique_count()}",
            "TEST RUNNER")

        success = True
        if len(not_consumed_msgs) > 0:
            console_out(
                f"FAILED TEST: Potential failure to promote Waiting to Active. Not consumed count: {len(not_consumed_msgs)}",
                "TEST RUNNER")
            success = False

        if msg_monitor.get_out_of_order() == True:
            success = False
            console_out(f"FAILED TEST: Received out-of-order messages",
                        "TEST RUNNER")

        if success:
            console_out("TEST OK", "TEST RUNNER")

        console_out("RESULTS END------------------------------------",
                    "TEST RUNNER")

        try:
            consumer_manager.stop_all_consumers()
            pub_thread.join()
            msg_monitor.stop_consuming()
            monitor_thread.join()
        except Exception as e:
            console_out("Failed to clean up test correctly: " + str(e),
                        "TEST RUNNER")

        console_out(f"TEST {str(test_number )} COMPLETE", "TEST RUNNER")
def main():
    print("quorum-queue-test.py")
    args = get_args(sys.argv)

    count = -1  # no limit
    tests = int(get_mandatory_arg(args, "--tests"))
    actions = int(get_mandatory_arg(args, "--actions"))
    in_flight_max = int(get_optional_arg(args, "--in-flight-max", 10))
    grace_period_sec = int(get_mandatory_arg(args, "--grace-period-sec"))
    cluster_size = get_optional_arg(args, "--cluster", "3")
    queue = get_mandatory_arg(args, "--queue")
    sac_enabled = is_true(get_mandatory_arg(args, "--sac"))
    chaos_mode = get_optional_arg(args, "--chaos-mode", "mixed")
    chaos_min_interval = int(
        get_optional_arg(args, "--chaos-min-interval", "30"))
    chaos_max_interval = int(
        get_optional_arg(args, "--chaos-max-interval", "120"))
    prefetch = int(get_optional_arg(args, "--pre-fetch", "10"))
    rmq_version = get_optional_arg_validated(args, "--rmq-version", "3.8-beta",
                                             ["3.7", "3.8-beta", "3.8-alpha"])

    for test_number in range(1, tests + 1):

        print("")
        console_out(
            f"TEST RUN: {str(test_number)} of {tests}--------------------------",
            "TEST RUNNER")
        setup_complete = False

        while not setup_complete:
            broker_manager = BrokerManager()
            broker_manager.deploy(cluster_size, True, rmq_version, False)
            initial_nodes = broker_manager.get_initial_nodes()

            console_out(f"Initial nodes: {initial_nodes}", "TEST RUNNER")

            print_mod = in_flight_max * 5
            queue_name = queue + "_" + str(test_number)

            mgmt_node = broker_manager.get_random_init_node()
            queue_created = False
            qc_ctr = 0
            while queue_created == False and qc_ctr < 20:
                qc_ctr += 1
                if sac_enabled:
                    queue_created = broker_manager.create_quorum_sac_queue(
                        mgmt_node, queue_name, cluster_size, 0)
                else:
                    queue_created = broker_manager.create_quorum_queue(
                        mgmt_node, queue_name, cluster_size, 0)

                if queue_created:
                    setup_complete = True
                else:
                    time.sleep(5)

        time.sleep(10)

        msg_monitor = MessageMonitor("qqt", test_number, print_mod, True,
                                     False)
        publisher = RabbitPublisher(1, test_number, broker_manager,
                                    in_flight_max, 120, print_mod)
        publisher.configure_sequence_direct(queue_name, count, 0, 1)
        consumer_manager = ConsumerManager(broker_manager, msg_monitor,
                                           "TEST RUNNER", False)
        consumer_manager.add_consumers(1, test_number, queue_name, prefetch)

        chaos = ChaosExecutor(initial_nodes)

        if chaos_mode == "partitions":
            chaos.only_partitions()
        elif chaos_mode == "nodes":
            chaos.only_kill_nodes()

        monitor_thread = threading.Thread(target=msg_monitor.process_messages)
        monitor_thread.start()

        consumer_manager.start_consumers()

        pub_thread = threading.Thread(target=publisher.start_publishing)
        pub_thread.start()
        console_out("publisher started", "TEST RUNNER")

        for action_num in range(1, actions + 1):
            wait_sec = random.randint(chaos_min_interval, chaos_max_interval)
            console_out(f"waiting for {wait_sec} seconds before next action",
                        "TEST RUNNER")
            time.sleep(wait_sec)

            console_out(
                f"execute chaos action {str(action_num)}/{actions} of test {str(test_number)}",
                "TEST RUNNER")
            chaos.execute_chaos_action()
            subprocess.call(["bash", "../cluster/cluster-status.sh"])

        time.sleep(60)
        console_out("repairing cluster", "TEST RUNNER")
        chaos.repair()
        console_out("repaired cluster", "TEST RUNNER")

        publisher.stop_publishing()

        console_out("starting grace period for consumer to catch up",
                    "TEST RUNNER")
        ctr = 0

        while True:
            ms_since_last_msg = datetime.datetime.now(
            ) - msg_monitor.get_last_msg_time()
            if msg_monitor.get_unique_count() >= publisher.get_pos_ack_count(
            ) and len(publisher.get_msg_set().difference(
                    msg_monitor.get_msg_set())) == 0:
                break
            elif ctr > grace_period_sec and ms_since_last_msg.total_seconds(
            ) > 15:
                break
            time.sleep(1)
            ctr += 1

        confirmed_set = publisher.get_msg_set()
        lost_msgs = confirmed_set.difference(msg_monitor.get_msg_set())

        console_out("RESULTS------------------------------------",
                    "TEST RUNNER")

        if len(lost_msgs) > 0:
            console_out(f"Lost messages count: {len(lost_msgs)}",
                        "TEST RUNNER")
            for msg in lost_msgs:
                console_out(f"Lost message: {msg}", "TEST RUNNER")

        console_out(
            f"Confirmed count: {publisher.get_pos_ack_count()} Received count: {msg_monitor.get_receive_count()} Unique received: {msg_monitor.get_unique_count()}",
            "TEST RUNNER")
        success = True

        if msg_monitor.get_out_of_order() == True:
            console_out("FAILED TEST: OUT OF ORDER MESSAGES", "TEST RUNNER")
            success = False

        if len(lost_msgs) > 0:
            console_out("FAILED TEST: LOST MESSAGES", "TEST RUNNER")
            success = False

        if success == True:
            console_out("TEST OK", "TEST RUNNER")

        console_out("RESULTS END------------------------------------",
                    "TEST RUNNER")

        try:
            consumer_manager.stop_all_consumers()
            pub_thread.join()
        except Exception as e:
            console_out("Failed to clean up test correctly: " + str(e),
                        "TEST RUNNER")

        console_out(f"TEST {str(test_number)} COMPLETE", "TEST RUNNER")
예제 #10
0
def main():
    print("random-test.py")
    #signal.signal(signal.SIGINT, interuppt_handler)
    args = get_args(sys.argv)

    count = -1  # no limit
    test_name = get_mandatory_arg(args, "--test-name")
    tests = int(get_mandatory_arg(args, "--tests"))
    run_minutes = int(get_mandatory_arg(args, "--run-minutes"))
    consumer_count = int(get_mandatory_arg(args, "--consumers"))
    prefetch = int(get_optional_arg(args, "--pre-fetch", "10"))
    grace_period_sec = int(get_mandatory_arg(args, "--grace-period-sec"))
    queue = get_mandatory_arg(args, "--queue")
    queue_type = get_mandatory_arg(args, "--queue-type")
    analyze = is_true(get_optional_arg(args, "--analyze", "true"))

    if queue_type == "quorum":
        qq_max_length = int(get_optional_arg(args, "--qq-max-length", "0"))

    sac_enabled = is_true(get_mandatory_arg(args, "--sac"))
    log_messages = is_true(get_optional_arg(args, "--log-msgs", "false"))

    publisher_count = int(get_optional_arg(args, "--publishers", "1"))
    if publisher_count > 0:
        in_flight_max = int(get_optional_arg(args, "--in-flight-max", "10"))
        print_mod = int(
            get_optional_arg(args, "--print-mod", f"{in_flight_max * 5}"))
        sequence_count = int(get_optional_arg(args, "--sequences", "1"))
    else:
        print_mod = int(get_optional_arg(args, "--print-mod", f"1000"))

    new_cluster = is_true(get_optional_arg(args, "--new-cluster", "true"))
    cluster_size = get_optional_arg(args, "--cluster", "3")
    rmq_version = get_optional_arg_validated(args, "--rmq-version", "3.8-beta",
                                             ["3.7", "3.8-beta", "3.8-alpha"])
    stop_mode = get_optional_arg_validated(args, "--stop-mode", "crash",
                                           ["crash", "close", "cancel"])

    use_toxiproxy = False
    consumer_hard_close = False
    if stop_mode == "crash":
        use_toxiproxy = True
    elif stop_mode == "close":
        consumer_hard_close = True

    include_chaos = is_true(get_optional_arg(args, "--chaos-actions", "true"))
    if include_chaos:
        chaos_mode = get_optional_arg(args, "--chaos-mode", "mixed")
        chaos_min_interval = int(
            get_optional_arg(args, "--chaos-min-interval", "60"))
        chaos_max_interval = int(
            get_optional_arg(args, "--chaos-max-interval", "120"))

    include_con_actions = is_true(
        get_optional_arg(args, "--consumer-actions", "true"))
    if include_con_actions:
        con_action_min_interval = int(
            get_optional_arg(args, "--consumer-min-interval", "20"))
        con_action_max_interval = int(
            get_optional_arg(args, "--consumer-max-interval", "60"))

    failed_test_log = list()
    failed_tests = set()

    for test_number in range(tests):

        print("")
        subprocess.call(["mkdir", f"logs/{test_name}/{str(test_number)}"])
        console_out(f"TEST RUN: {str(test_number)} --------------------------",
                    "TEST RUNNER")
        broker_manager = BrokerManager()
        broker_manager.deploy(cluster_size, new_cluster, rmq_version,
                              use_toxiproxy)
        initial_nodes = broker_manager.get_initial_nodes()
        console_out(f"Initial nodes: {initial_nodes}", "TEST RUNNER")

        queue_name = queue + "_" + str(test_number)
        mgmt_node = broker_manager.get_random_init_node()
        queue_created = False

        while queue_created == False:
            if queue_type == "mirrored":
                if sac_enabled:
                    queue_created = broker_manager.create_standard_sac_queue(
                        mgmt_node, queue_name, cluster_size)
                else:
                    queue_created = broker_manager.create_standard_queue(
                        mgmt_node, queue_name, cluster_size)
            elif queue_type == "quorum":
                if sac_enabled:
                    queue_created = broker_manager.create_quorum_sac_queue(
                        mgmt_node, queue_name, cluster_size, qq_max_length)
                else:
                    queue_created = broker_manager.create_quorum_queue(
                        mgmt_node, queue_name, cluster_size, qq_max_length)

            if queue_created == False:
                time.sleep(5)

        time.sleep(10)

        msg_monitor = MessageMonitor(test_name, test_number, print_mod,
                                     analyze, log_messages)
        chaos = ChaosExecutor(initial_nodes)

        if include_chaos:
            if chaos_mode == "partitions":
                chaos.only_partitions()
            elif chaos_mode == "nodes":
                chaos.only_kill_nodes()

        monitor_thread = threading.Thread(target=msg_monitor.process_messages)
        monitor_thread.start()

        if consumer_count > 0:
            consumer_manager = ConsumerManager(broker_manager, msg_monitor,
                                               "TEST RUNNER", use_toxiproxy)
            consumer_manager.add_consumers(consumer_count, test_number,
                                           queue_name, prefetch)
            consumer_manager.start_consumers()

        if publisher_count == 1:
            publisher = RabbitPublisher(1, test_number, broker_manager,
                                        in_flight_max, 120, print_mod)
            publisher.configure_sequence_direct(queue_name, count, 0,
                                                sequence_count)

            pub_thread = threading.Thread(target=publisher.start_publishing)
            pub_thread.start()
            console_out("publisher started", "TEST RUNNER")

        if include_con_actions or include_chaos:
            init_wait_sec = 20
            console_out(
                f"Will start chaos and consumer actions in {init_wait_sec} seconds",
                "TEST RUNNER")
            time.sleep(init_wait_sec)

        if include_chaos:
            chaos_thread = threading.Thread(
                target=chaos.start_random_single_action_and_repair,
                args=(chaos_min_interval, chaos_max_interval))
            chaos_thread.start()
            console_out("Chaos executor started", "TEST RUNNER")

        if include_con_actions:
            consumer_action_thread = threading.Thread(
                target=consumer_manager.start_random_consumer_actions,
                args=(con_action_min_interval, con_action_max_interval,
                      consumer_hard_close))
            consumer_action_thread.start()
            console_out("Consumer actions started", "TEST RUNNER")

        ctr = 0
        run_seconds = run_minutes * 60
        while ctr < run_seconds and not stop_please:
            try:
                time.sleep(1)
                ctr += 1

                if ctr % 60 == 0:
                    console_out(
                        f"Test at {int(ctr/60)} minute mark, {int((run_seconds-ctr)/60)} minutes left",
                        "TEST RUNNER")
            except KeyboardInterrupt:
                console_out(
                    f"Test forced to stop at {int(ctr/60)} minute mark, {int((run_seconds-ctr)/60)} minutes left)",
                    "TEST RUNNER")
                break

        try:
            chaos.stop_random_single_action_and_repair()

            if consumer_count > 0:
                consumer_manager.stop_random_consumer_actions()

            if include_chaos:
                chaos_thread.join(30)

            if include_con_actions:
                consumer_action_thread.join(30)
        except Exception as e:
            console_out("Failed to stop chaos cleanly: " + str(e),
                        "TEST RUNNER")

        if publisher_count > 0:
            publisher.stop_publishing()

        if consumer_count > 0:
            console_out("Resuming consumers", "TEST RUNNER")
            consumer_manager.resume_all_consumers()

            console_out("Starting grace period for consumer to catch up",
                        "TEST RUNNER")
            ctr = 0

            try:
                while ctr < grace_period_sec:
                    if publisher_count > 0 and msg_monitor.get_unique_count(
                    ) >= publisher.get_pos_ack_count() and len(
                            publisher.get_msg_set().difference(
                                msg_monitor.get_msg_set())) == 0:
                        break
                    time.sleep(1)
                    ctr += 1
            except KeyboardInterrupt:
                console_out("Grace period ended", "TEST RUNNER")

        console_out("RESULTS ----------------------------------------",
                    "TEST RUNNER")
        if publisher_count > 0:
            confirmed_set = publisher.get_msg_set()
            not_consumed_msgs = confirmed_set.difference(
                msg_monitor.get_msg_set())
            console_out(
                f"Confirmed count: {publisher.get_pos_ack_count()} Received count: {msg_monitor.get_receive_count()} Unique received: {msg_monitor.get_unique_count()}",
                "TEST RUNNER")
        else:
            not_consumed_msgs = set()
            console_out(
                f"Received count: {msg_monitor.get_receive_count()} Unique received: {msg_monitor.get_unique_count()}",
                "TEST RUNNER")

        success = True
        if consumer_count > 0:
            if len(not_consumed_msgs) > 0:
                if sac_enabled:
                    console_out(
                        f"FAILED TEST: Potential message loss or failure of consumers to consume or failure to promote Waiting to Active. Not consumed count: {len(not_consumed_msgs)}",
                        "TEST RUNNER")
                else:
                    console_out(
                        f"FAILED TEST: Potential message loss or failure of consumers to consume. Not consumed count: {len(not_consumed_msgs)}",
                        "TEST RUNNER")
                failed_test_log.append(
                    f"Test {test_number} FAILURE: Potential Message Loss. {len(not_consumed_msgs)} messsages."
                )
                failed_tests.add(test_number)

                lost_ctr = 0
                sorted_msgs = list(not_consumed_msgs)
                sorted_msgs.sort()
                for msg in sorted_msgs:
                    console_out(f"Lost? {msg}", "TEST RUNNER")
                    lost_ctr += 1
                    if lost_ctr > 500:
                        console_out("More than 500, truncated list",
                                    "TEST RUNNER")
                        break

                success = False

            if msg_monitor.get_out_of_order() == True:
                success = False
                console_out(f"FAILED TEST: Received out-of-order messages",
                            "TEST RUNNER")
                failed_test_log.append(
                    f"Test {test_number} FAILURE: Received out-of-order messages"
                )
                failed_tests.add(test_number)

        if success:
            console_out("TEST OK", "TEST RUNNER")

        console_out("RESULTS END ------------------------------------",
                    "TEST RUNNER")

        try:
            if consumer_count > 0:
                consumer_manager.stop_all_consumers()

            if publisher_count == 1:
                pub_thread.join(30)
            msg_monitor.stop_consuming()
            monitor_thread.join(30)
        except Exception as e:
            console_out_exception("Failed to clean up test correctly.", e,
                                  "TEST RUNNER")

        broker_manager.zip_log_files(test_name, test_number)
        console_out(f"TEST {str(test_number )} COMPLETE", "TEST RUNNER")

    console_out("", "TEST RUNNER")
    console_out("SUMMARY", "TEST RUNNER")
    console_out(f"OK {tests - len(failed_tests)} FAIL {len(failed_tests)}",
                "TEST RUNNER")
    for line in failed_test_log:
        console_out(line, "TEST RUNNER")

    console_out("TEST RUN COMPLETE", "TEST RUNNER")
예제 #11
0
def main():

    #signal.signal(signal.SIGINT, interuppt_handler)
    args = get_args(sys.argv)

    count = -1 # no limit
    tests = int(get_mandatory_arg(args, "--tests"))
    run_minutes = int(get_mandatory_arg(args, "--run-minutes"))
    consumer_count = int(get_mandatory_arg(args, "--consumers"))
    grace_period_sec = int(get_mandatory_arg(args, "--grace-period-sec"))
    queue = get_mandatory_arg(args, "--queue")
    queue_type = get_mandatory_arg(args, "--queue-type")
    sac = get_mandatory_arg(args, "--sac")

    publisher_count = int(get_optional_arg(args, "--publishers", "1"))
    print_mod = int(get_optional_arg(args, "--print-mod", "0"))
    new_cluster = get_optional_arg(args, "--new-cluster", "true")
    in_flight_max = int(get_optional_arg(args, "--in-flight-max", "10"))
    sequence_count = int(get_optional_arg(args, "--sequences", "1"))
    cluster_size = get_optional_arg(args, "--cluster", "3")
    chaos = get_optional_arg(args, "--chaos-actions", "true")
    chaos_mode = get_optional_arg(args, "--chaos-mode", "mixed")
    chaos_min_interval = int(get_optional_arg(args, "--chaos-min-interval", "60"))
    chaos_max_interval = int(get_optional_arg(args, "--chaos-max-interval", "120"))
    consumer_actions = get_optional_arg(args, "--consumer-actions", "true")
    con_action_min_interval = int(get_optional_arg(args, "--consumer-min-interval", "20"))
    con_action_max_interval = int(get_optional_arg(args, "--consumer-max-interval", "60"))

    if print_mod == 0:
        print_mod = in_flight_max * 5

    include_chaos = True
    if chaos.upper() == "FALSE":
        include_chaos = False

    include_con_actions = True
    if consumer_actions.upper() == "FALSE":
        include_con_actions = False

    sac_enabled = True
    if sac.upper() == "FALSE":
        sac_enabled = False

    message_type = "sequence"
    
    for test_number in range(tests):

        print("")
        console_out(f"TEST RUN: {str(test_number)} --------------------------", "TEST RUNNER")
        if new_cluster.upper() == "TRUE":
            subprocess.call(["bash", "../automated/setup-test-run.sh", cluster_size, "3.8"])
            console_out(f"Waiting for cluster...", "TEST RUNNER")
            time.sleep(30)

        console_out(f"Cluster status:", "TEST RUNNER")
        subprocess.call(["bash", "../cluster/cluster-status.sh"])
        
        broker_manager = BrokerManager()
        broker_manager.load_initial_nodes()
        initial_nodes = broker_manager.get_initial_nodes()
        console_out(f"Initial nodes: {initial_nodes}", "TEST RUNNER")

        queue_name = queue + "_" + str(test_number)
        mgmt_node = broker_manager.get_random_init_node()
        queue_created = False

        while queue_created == False:  
            if sac_enabled:  
                queue_created = broker_manager.create_sac_queue(mgmt_node, queue_name, cluster_size, queue_type)
            else:
                queue_created = broker_manager.create_queue(mgmt_node, queue_name, cluster_size, queue_type)

            if queue_created == False:
                time.sleep(5)

        time.sleep(10)

        msg_monitor = MessageMonitor(print_mod)
        stats = QueueStats('jack', 'jack', queue_name)
        chaos = ChaosExecutor(initial_nodes)

        if chaos_mode == "partitions":
            chaos.only_partitions()
        elif chaos_mode == "nodes":
            chaos.only_kill_nodes()

        consumer_manager = ConsumerManager(broker_manager, msg_monitor, "TEST RUNNER")

        pub_node = broker_manager.get_random_init_node()
        publisher = RabbitPublisher(f"PUBLISHER(Test:{test_number} Id:P1)", initial_nodes, pub_node, in_flight_max, 120, print_mod)
        consumer_manager.add_consumers(consumer_count, test_number, queue_name)

        monitor_thread = threading.Thread(target=msg_monitor.process_messages)
        monitor_thread.start()
        
        consumer_manager.start_consumers()

        if publisher_count == 1:
            pub_thread = threading.Thread(target=publisher.publish_direct,args=(queue_name, count, sequence_count, 0, "sequence"))
            pub_thread.start()
            console_out("publisher started", "TEST RUNNER")

        if include_con_actions or include_chaos:
            init_wait_sec = 20
            console_out(f"Will start chaos and consumer actions in {init_wait_sec} seconds", "TEST RUNNER")
            time.sleep(init_wait_sec)

        if include_chaos:
            chaos_thread = threading.Thread(target=chaos.start_random_single_action_and_repair,args=(chaos_min_interval,chaos_max_interval))
            chaos_thread.start()
            console_out("Chaos executor started", "TEST RUNNER")

        if include_con_actions:
            consumer_action_thread = threading.Thread(target=consumer_manager.start_random_consumer_actions,args=(con_action_min_interval, con_action_max_interval))
            consumer_action_thread.start()
            console_out("Consumer actions started", "TEST RUNNER")

        
        ctr = 0
        run_seconds = run_minutes * 60
        while ctr < run_seconds and not stop_please:
            try:
                time.sleep(1)
                ctr += 1

                if ctr % 60 == 0:
                    console_out(f"Test at {int(ctr/60)} minute mark, {int((run_seconds-ctr)/60)} minutes left", "TEST RUNNER")
            except KeyboardInterrupt:
                console_out(f"Test forced to stop at {int(ctr/60)} minute mark, {int((run_seconds-ctr)/60)} minutes left)", "TEST RUNNER")
                break

        try:
            chaos.stop_random_single_action_and_repair()
            consumer_manager.stop_random_consumer_actions()
            
            if include_chaos:
                chaos_thread.join()

            if include_con_actions:
                consumer_action_thread.join()
        except Exception as e:
            console_out("Failed to stop chaos cleanly: " + str(e), "TEST RUNNER")

        console_out("Resuming consumers", "TEST RUNNER")
        consumer_manager.resume_all_consumers()
        
        if publisher_count == 1:
            publisher.stop(True)

        console_out("starting grace period for consumer to catch up", "TEST RUNNER")
        ctr = 0
        
        while ctr < grace_period_sec:
            if msg_monitor.get_unique_count() >= publisher.get_pos_ack_count() and len(publisher.get_msg_set().difference(msg_monitor.get_msg_set())) == 0:
                break
            time.sleep(1)
            ctr += 1

        confirmed_set = publisher.get_msg_set()
        not_consumed_msgs = confirmed_set.difference(msg_monitor.get_msg_set())

        console_out("RESULTS ----------------------------------------", "TEST RUNNER")
        console_out(f"Confirmed count: {publisher.get_pos_ack_count()} Received count: {msg_monitor.get_receive_count()} Unique received: {msg_monitor.get_unique_count()}", "TEST RUNNER")

        success = True
        if len(not_consumed_msgs) > 0:
            console_out(f"FAILED TEST: Potential failure to promote Waiting to Active. Not consumed count: {len(not_consumed_msgs)}", "TEST RUNNER")
            success = False

        if msg_monitor.get_out_of_order() == True:
            success = False
            console_out(f"FAILED TEST: Received out-of-order messages", "TEST RUNNER")

        if success:
            console_out("TEST OK", "TEST RUNNER")

        console_out("RESULTS END ------------------------------------", "TEST RUNNER")

        try:
            consumer_manager.stop_all_consumers()
            
            if publisher_count == 1:
                pub_thread.join()
            msg_monitor.stop_consuming()
            monitor_thread.join()
        except Exception as e:
            console_out("Failed to clean up test correctly: " + str(e), "TEST RUNNER")

        console_out(f"TEST {str(test_number )} COMPLETE", "TEST RUNNER")
예제 #12
0
PORT_MQ = 5672
PORT_REDIS = 6379
USER_MQ = 'admin'
PASSWD_MQ = '000000'
PASSWD_REDIS =''
VHOST = 'test'

att_dict={'func','from'}
redis = RedisDb(host=HOST, 
                port=PORT_REDIS, 
                pwd=PASSWD_REDIS
                )

producer = RabbitPublisher(host=HOST, 
                    port=PORT_MQ, 
                    user=USER_MQ, 
                    pwd=PASSWD_MQ,
                    vhost=VHOST)

consumer = RabbitConsumer(host=HOST, 
                    port=PORT_MQ, 
                    user=USER_MQ, 
                    pwd=PASSWD_MQ,
                    vhost=VHOST)


def send_to_mq(msg):
    try:
        #producer.pu()
        producer.publish(msg=msg, 
                        routing_key='FSReplay')