Esempio n. 1
0
    def test_MB_34947(self):
        # Update already Created docs with async_writes
        load_gen = doc_generator(self.key, 0, self.num_items,
                                 key_size=self.key_size,
                                 doc_size=self.doc_size,
                                 doc_type=self.doc_type,
                                 vbuckets=self.cluster.vbuckets)
        task = self.task.async_load_gen_docs(
            self.cluster, self.def_bucket, load_gen, "update", 0,
            persist_to=self.persist_to, replicate_to=self.replicate_to,
            timeout_secs=self.sdk_timeout,
            batch_size=10, process_concurrency=8)
        self.task.jython_task_manager.get_task_result(task)

        # Update bucket replica to new value
        bucket_helper = BucketHelper(self.cluster.master)
        bucket_helper.change_bucket_props(
            self.def_bucket, replicaNumber=self.new_replica)
        self.bucket_util.print_bucket_stats(self.cluster)

        # Start rebalance task
        rebalance = self.task.async_rebalance(self.cluster.servers, [], [])
        self.sleep(10, "Wait for rebalance to start")

        # Wait for rebalance task to complete
        self.task.jython_task_manager.get_task_result(rebalance)

        # Assert if rebalance failed
        self.assertTrue(rebalance.result,
                        "Rebalance failed after replica update")
Esempio n. 2
0
 def update_bucket_replica(self):
     self.log.info("Updating all the bucket replicas to {0}".format(
         self.replicas_for_failover))
     for i in range(len(self.bucket_util.buckets)):
         bucket_helper = BucketHelper(self.cluster.master)
         bucket_helper.change_bucket_props(
             self.bucket_util.buckets[i],
             replicaNumber=self.replicas_for_failover)
     task = self.task.async_rebalance(
         self.cluster.servers[:self.nodes_init], [], [])
     self.task.jython_task_manager.get_task_result(task)
     self.log.info("Bucket stats before failover")
     self.bucket_util.print_bucket_stats()
    def test_multiple_scenarios(self):
        """
        Test multiple rebalance scenarios in single test with CRUDs in parallel

        1. Rebalance_out orchestrator node
        2. Rebalance_in nodes as given in nodes_in param
        3. Update replica and do rebalance
        4. Rebalance_out nodes as given in nodes_out param
        5. Do Plain CRUDs at the end of all this to verify the cluster status
        """

        # Local function to wait for all crud task to complete
        def wait_for_crud_task_and_verify_for_no_errors(tasks_info):
            if not self.atomicity:
                self.bucket_util.verify_doc_op_task_exceptions(
                    tasks_info, self.cluster)
                self.bucket_util.log_doc_ops_task_failures(tasks_info)
                for task, task_info in tasks_info.items():
                    self.assertFalse(
                        task_info["ops_failed"],
                        "Doc ops failed for task: {}".format(task.thread_name))

        self.assertTrue(self.replica_to_update is not None)
        def_bucket = self.bucket_util.buckets[0]
        servers_in = [
            self.cluster.servers[self.nodes_init + i]
            for i in range(self.nodes_in)
        ]
        servers_out = [
            self.cluster.servers[self.nodes_init - i - 1]
            for i in range(self.nodes_out)
        ]

        # Start CRUD operations
        crud_tasks = self.__load_docs_in_all_buckets()

        # Rebalance_out the orchestrator node
        rebalance_result = self.task.rebalance(
            self.cluster.servers[:self.nodes_init], [],
            [self.cluster.servers[0]])
        self.assertTrue(rebalance_result,
                        "Rebalance out orchestrator node failed")
        # Wait for all CRUD tasks to complete and verify no failures are seen
        self.cluster.master = self.servers[1]
        wait_for_crud_task_and_verify_for_no_errors(crud_tasks)

        self.cluster.nodes_in_cluster = self.servers[1:self.nodes_init]
        # Start CRUD operations
        crud_tasks = self.__load_docs_in_all_buckets()
        # Rebalance_in multiple cluster nodes
        self.add_remove_servers_and_rebalance(servers_in, [])
        wait_for_crud_task_and_verify_for_no_errors(crud_tasks)

        # Start CRUD operations
        crud_tasks = self.__load_docs_in_all_buckets()
        # Update bucket replica value
        bucket_helper = BucketHelper(self.cluster.servers[1])
        bucket_helper.change_bucket_props(def_bucket,
                                          replicaNumber=self.replica_to_update)
        # Start and wait till rebalance is complete
        rebalance = self.task.async_rebalance(self.cluster.nodes_in_cluster,
                                              [], [])
        self.task.jython_task_manager.get_task_result(rebalance)
        wait_for_crud_task_and_verify_for_no_errors(crud_tasks)

        # Start CRUD operations
        crud_tasks = self.__load_docs_in_all_buckets()
        # Rebalance_out multiple cluster nodes
        self.add_remove_servers_and_rebalance([], servers_out)
        wait_for_crud_task_and_verify_for_no_errors(crud_tasks)

        # Start CRUD operations
        crud_tasks = self.__load_docs_in_all_buckets()
        wait_for_crud_task_and_verify_for_no_errors(crud_tasks)

        # Doc count verification
        if not self.atomicity:
            self.bucket_util._wait_for_stats_all_buckets()
            self.bucket_util.verify_stats_all_buckets(self.num_items)
Esempio n. 4
0
    def test_rebalance_inout_with_durability_check(self):
        """
        Perform irregular number of in_out nodes
        1. Swap-out 'self.nodes_out' nodes
        2. Add 'self.nodes_in' nodes into the cluster
        3. Perform swap-rebalance
        4. Make sure durability is not broken due to swap-rebalance

        Note: This is a Positive case. i.e: Durability should not be broken
        """
        master = self.cluster.master
        num_initial_servers = self.num_initial_servers
        creds = self.input.membase_settings
        def_bucket = self.bucket_util.buckets[0]

        # Update replica value before performing rebalance in/out
        if self.replica_to_update:
            bucket_helper = BucketHelper(self.cluster.master)

            # Recalculate replicate_to/persist_to as per new replica value
            if self.self.durability_level is None:
                self.replicate_to = floor(self.replica_to_update / 2) + 1
                self.persist_to = floor(self.replica_to_update / 2) + 2

            # Update bucket replica to new value as given in conf file
            self.log.info("Updating replica count of bucket to {0}".format(
                self.replica_to_update))
            bucket_helper.change_bucket_props(
                def_bucket.name, replicaNumber=self.replica_to_update)

        # Rest connection to add/rebalance/monitor nodes
        rest = RestConnection(master)

        # Start the swap rebalance
        current_nodes = RebalanceHelper.getOtpNodeIds(master)
        self.log.info("current nodes : {0}".format(current_nodes))
        toBeEjectedNodes = RebalanceHelper.pick_nodes(master,
                                                      howmany=self.nodes_out)
        optNodesIds = [node.id for node in toBeEjectedNodes]

        if self.swap_orchestrator:
            status, content = self.cluster_util.find_orchestrator(master)
            self.assertTrue(status,
                            msg="Unable to find orchestrator: {0}:{1}".format(
                                status, content))
            if self.nodes_out is len(current_nodes):
                optNodesIds.append(content)
            else:
                optNodesIds[0] = content

        for node in optNodesIds:
            self.log.info(
                "removing node {0} and rebalance afterwards".format(node))

        new_swap_servers = self.servers[
            num_initial_servers:num_initial_servers + self.nodes_in]
        for server in new_swap_servers:
            otpNode = rest.add_node(creds.rest_username, creds.rest_password,
                                    server.ip, server.port)
            msg = "unable to add node {0} to the cluster"
            self.assertTrue(otpNode, msg.format(server.ip))

        if self.swap_orchestrator:
            rest = RestConnection(new_swap_servers[0])
            master = new_swap_servers[0]

        if self.do_access:
            self.log.info("DATA ACCESS PHASE")
            self.loaders = self.start_access_phase()

        self.log.info("SWAP REBALANCE PHASE")
        rest.rebalance(otpNodes=[node.id for node in rest.node_statuses()],
                       ejectedNodes=optNodesIds)

        if self.do_stop_start:
            # Rebalance is stopped at 20%, 40% and 60% completion
            retry = 0
            for expected_progress in (20, 40, 60):
                self.log.info(
                    "STOP/START SWAP REBALANCE PHASE WITH PROGRESS {0}%".
                    format(expected_progress))
                while True:
                    progress = rest._rebalance_progress()
                    if progress < 0:
                        self.log.error(
                            "rebalance progress code : {0}".format(progress))
                        break
                    elif progress == 100:
                        self.log.warn("Rebalance has already reached 100%")
                        break
                    elif progress >= expected_progress:
                        self.log.info(
                            "Rebalance will be stopped with {0}%".format(
                                progress))
                        stopped = rest.stop_rebalance()
                        self.assertTrue(stopped,
                                        msg="unable to stop rebalance")
                        self.sleep(20)
                        rest.rebalance(otpNodes=[
                            node.id for node in rest.node_statuses()
                        ],
                                       ejectedNodes=optNodesIds)
                        break
                    elif retry > 100:
                        break
                    else:
                        retry += 1
                        self.sleep(1)
        self.assertTrue(
            rest.monitorRebalance(),
            msg="rebalance operation failed after adding node {0}".format(
                optNodesIds))
        self.verification_phase()
Esempio n. 5
0
    def test_volume_taf(self):
        self.loop = 0
        # self.cluster_utils.set_metadata_purge_interval()
        if self.number_of_indexes > 0:
            # start running select queries thread
            self.query_thread = threading.Thread(target=self.run_select_query)
            self.query_thread_flag = True
            self.query_thread.start()
            # Start running ui stats queries thread
            self.ui_stats_thread = threading.Thread(
                target=self.run_ui_stats_queries)
            self.ui_stats_thread_flag = True
            self.ui_stats_thread.start()
        self.log.info("Finished steps 1-4 successfully in setup")
        while self.loop < self.iterations:
            if self.loop > 0 or self.flush_buckets_before_indexes_creation:
                self.log.info("Reloading items to buckets")
                self.reload_data_into_buckets()
            #########################################################################################################################
            self.log.info("Step 5: Rebalance in with Loading of docs")
            rebalance_task = self.rebalance(nodes_in=1, nodes_out=0)
            task = self.data_load_collection()
            self.wait_for_rebalance_to_complete(rebalance_task)
            self.wait_for_async_data_load_to_complete(task)
            self.data_validation_collection()
            self.bucket_util.print_bucket_stats()
            #########################################################################################################################
            self.log.info("Step 6: Rebalance Out with Loading of docs")
            rebalance_task = self.rebalance(nodes_in=0, nodes_out=1)
            task = self.data_load_collection()
            self.wait_for_rebalance_to_complete(rebalance_task)
            self.wait_for_async_data_load_to_complete(task)
            self.data_validation_collection()
            self.bucket_util.print_bucket_stats()
            #######################################################################################################################
            self.log.info("Step 7: Rebalance In_Out with Loading of docs")
            rebalance_task = self.rebalance(nodes_in=2, nodes_out=1)
            task = self.data_load_collection()
            self.wait_for_rebalance_to_complete(rebalance_task)
            self.wait_for_async_data_load_to_complete(task)
            self.data_validation_collection()
            self.bucket_util.print_bucket_stats()
            ########################################################################################################################
            self.log.info("Step 8: Swap with Loading of docs")
            rebalance_task = self.rebalance(nodes_in=1, nodes_out=1)
            task = self.data_load_collection()
            self.wait_for_rebalance_to_complete(rebalance_task)
            self.wait_for_async_data_load_to_complete(task)
            self.data_validation_collection()
            self.tasks = []
            self.bucket_util.print_bucket_stats()
            ########################################################################################################################
            self.log.info("Step 9: Updating the bucket replica to 2")
            bucket_helper = BucketHelper(self.cluster.master)
            for i in range(len(self.bucket_util.buckets)):
                bucket_helper.change_bucket_props(self.bucket_util.buckets[i],
                                                  replicaNumber=2)
            rebalance_task = self.rebalance(nodes_in=1, nodes_out=0)
            task = self.data_load_collection()
            self.wait_for_rebalance_to_complete(rebalance_task)
            self.wait_for_async_data_load_to_complete(task)
            self.data_validation_collection()
            self.bucket_util.print_bucket_stats()
            ########################################################################################################################
            self.log.info(
                "Enabling autoreprovison before inducing failure to prevent data loss "
                "for if there are ephemeral buckets")
            status = self.rest.update_autoreprovision_settings(True,
                                                               maxNodes=1)
            if not status:
                self.fail("Failed to enable autoreprovison")
            step_count = 9
            for action in [
                    CouchbaseError.STOP_MEMCACHED,
                    CouchbaseError.STOP_PROMETHEUS
            ]:
                step_count = step_count + 1
                self.log.info("Step {0}: {1}".format(step_count, action))
                self.log.info("Forcing durability level: MAJORITY")
                self.durability_level = "MAJORITY"
                task = self.data_load_collection()
                self.induce_and_revert_failure(action)
                # Rebalance is required after error is reverted
                rebalance_task = self.task.async_rebalance(
                    self.cluster.servers, [], [], retry_get_process_num=200)
                self.wait_for_rebalance_to_complete(rebalance_task)
                self.wait_for_async_data_load_to_complete(task)
                self.data_validation_collection()
                self.bucket_util.print_bucket_stats()
            self.durability_level = ""
            ########################################################################################################################
            step_count = 11
            for failover in ["Graceful", "Hard"]:
                for action in [
                        "RebalanceOut", "FullRecovery", "DeltaRecovery"
                ]:
                    step_count = step_count + 1
                    self.log.info(
                        "Step {0}: {1} Failover a node and {2} that node with data load in parallel"
                        .format(step_count, failover, action))

                    self.std_vbucket_dist = self.input.param(
                        "std_vbucket_dist", None)
                    std = self.std_vbucket_dist or 1.0

                    kv_nodes = self.cluster_util.get_kv_nodes()
                    self.log.info(
                        "Collecting pre_failover_stats. KV nodes are {0}".
                        format(kv_nodes))
                    prev_failover_stats = self.bucket_util.get_failovers_logs(
                        kv_nodes, self.bucket_util.buckets)
                    prev_vbucket_stats = self.bucket_util.get_vbucket_seqnos(
                        kv_nodes, self.bucket_util.buckets)
                    self.sleep(10)

                    disk_replica_dataset, disk_active_dataset = self.bucket_util.get_and_compare_active_replica_data_set_all(
                        kv_nodes, self.bucket_util.buckets, path=None)

                    self.rest = RestConnection(self.cluster.master)
                    self.nodes = self.cluster_util.get_nodes(
                        self.cluster.master)
                    self.chosen = self.cluster_util.pick_nodes(
                        self.cluster.master,
                        howmany=1,
                        exclude_nodes=self.exclude_nodes)

                    reset_flag = False
                    if (not self.durability_level) and failover == "Hard":
                        # Force a durability level to prevent data loss during hard failover
                        self.log.info("Forcing durability level: MAJORITY")
                        self.durability_level = "MAJORITY"
                        reset_flag = True
                    task = self.data_load_collection()
                    if reset_flag:
                        self.durability_level = ""

                    # Mark Node for failover
                    if failover == "Graceful":
                        self.success_failed_over = self.rest.fail_over(
                            self.chosen[0].id, graceful=True)
                    else:
                        self.success_failed_over = self.rest.fail_over(
                            self.chosen[0].id, graceful=False)

                    self.sleep(300)
                    self.wait_for_failover_or_assert(1)

                    # Perform the action
                    if action == "RebalanceOut":
                        self.nodes = self.rest.node_statuses()
                        self.rest.rebalance(
                            otpNodes=[node.id for node in self.nodes],
                            ejectedNodes=[self.chosen[0].id])
                        # self.sleep(600)
                        self.assertTrue(
                            self.rest.monitorRebalance(stop_if_loop=False),
                            msg="Rebalance failed")
                        servs_out = [
                            node for node in self.cluster.servers
                            if node.ip == self.chosen[0].ip
                        ]
                        self.cluster.nodes_in_cluster = list(
                            set(self.cluster.nodes_in_cluster) -
                            set(servs_out))
                        self.available_servers += servs_out
                        self.sleep(10)
                    else:
                        if action == "FullRecovery":
                            if self.success_failed_over:
                                self.rest.set_recovery_type(
                                    otpNode=self.chosen[0].id,
                                    recoveryType="full")
                        elif action == "DeltaRecovery":
                            if self.success_failed_over:
                                self.rest.set_recovery_type(
                                    otpNode=self.chosen[0].id,
                                    recoveryType="delta")

                        rebalance_task = self.task.async_rebalance(
                            self.cluster.servers[:self.nodes_init], [], [],
                            retry_get_process_num=200)
                        self.wait_for_rebalance_to_complete(rebalance_task)
                        self.sleep(10)

                    self.wait_for_async_data_load_to_complete(task)
                    self.data_validation_collection()

                    kv_nodes = self.cluster_util.get_kv_nodes()
                    self.log.info(
                        "Collecting post_failover_stats. KV nodes are {0}".
                        format(kv_nodes))
                    self.bucket_util.compare_failovers_logs(
                        prev_failover_stats, kv_nodes,
                        self.bucket_util.buckets)
                    self.sleep(10)

                    self.bucket_util.data_analysis_active_replica_all(
                        disk_active_dataset,
                        disk_replica_dataset,
                        kv_nodes,
                        self.bucket_util.buckets,
                        path=None)
                    self.bucket_util.vb_distribution_analysis(
                        servers=kv_nodes,
                        buckets=self.bucket_util.buckets,
                        num_replicas=2,
                        std=std,
                        total_vbuckets=self.cluster_util.vbuckets)
                    self.sleep(10)
                    self.tasks = []
                    # Bring back the rebalance out node back to cluster for further steps
                    if action == "RebalanceOut":
                        self.sleep(120)
                        rebalance_task = self.rebalance(nodes_in=1,
                                                        nodes_out=0)
                        # self.sleep(600)
                        self.wait_for_rebalance_to_complete(rebalance_task)
                    self.bucket_util.print_bucket_stats()
            ########################################################################################################################
            self.log.info("Step 18: Updating the bucket replica to 1")
            bucket_helper = BucketHelper(self.cluster.master)
            for i in range(len(self.bucket_util.buckets)):
                bucket_helper.change_bucket_props(self.bucket_util.buckets[i],
                                                  replicaNumber=1)
            rebalance_task = self.task.async_rebalance(
                self.cluster.servers, [], [], retry_get_process_num=200)
            task = self.data_load_collection()
            self.wait_for_rebalance_to_complete(rebalance_task)
            self.wait_for_async_data_load_to_complete(task)
            self.data_validation_collection()
            self.tasks = []
            self.bucket_util.print_bucket_stats()
            ########################################################################################################################
            self.log.info(
                "Step 19: Flush bucket(s) and start the entire process again")
            self.loop += 1
            if self.loop < self.iterations:
                # Flush buckets(s)
                self.bucket_util.flush_all_buckets(
                    self.cluster.master, skip_resetting_num_items=True)
                self.sleep(10)
                if len(self.cluster.nodes_in_cluster) > self.nodes_init:
                    self.nodes_cluster = self.cluster.nodes_in_cluster[:]
                    self.nodes_cluster.remove(self.cluster.master)
                    servs_out = random.sample(
                        self.nodes_cluster,
                        int(
                            len(self.cluster.nodes_in_cluster) -
                            self.nodes_init))
                    rebalance_task = self.task.async_rebalance(
                        self.cluster.servers[:self.nodes_init], [],
                        servs_out,
                        retry_get_process_num=200)
                    self.wait_for_rebalance_to_complete(rebalance_task)
                    self.available_servers += servs_out
                    self.cluster.nodes_in_cluster = list(
                        set(self.cluster.nodes_in_cluster) - set(servs_out))
            else:
                if self.number_of_indexes > 0:
                    # Join query thread
                    self.query_thread_flag = False
                    self.query_thread.join()
                    self.query_thread = None
                    # Join ui_stats thread
                    self.ui_stats_thread_flag = False
                    self.ui_stats_thread.join()
                    self.ui_stats_thread = None
                self.log.info("Volume Test Run Complete")
Esempio n. 6
0
    def Volume(self):
        #######################################################################
        self.log.info("Step1: Create a n node cluster")
        if self.nodes_init > 1:
            nodes_init = self.cluster.servers[1:self.nodes_init]
            self.task.rebalance([self.cluster.master], nodes_init, [])
            self.cluster.nodes_in_cluster.extend([self.cluster.master] +
                                                 nodes_init)

        #######################################################################
        self.log.info("Step 2 & 3: Create required buckets.")
        self.bucket = self.create_required_buckets()
        self.loop = 0
        scope_name = "VolumeScope"
        collection_prefix = "VolumeCollection"
        self.bucket_util.create_scope(self.cluster.master, self.bucket,
                                      {"name": scope_name})
        for i in range(self.num_collections):
            collection_name = collection_prefix + str(i)
            self.log.info("Creating scope::collection '%s::%s'" %
                          (scope_name, collection_name))
            self.bucket_util.create_collection(self.cluster.master,
                                               self.bucket, scope_name,
                                               {"name": collection_name})
            self.sleep(2)
        #######################################################################
        while self.loop < self.iterations:
            self.log.info("Step 4: Pre-Requisites for Loading of docs")
            self.bucket_util.add_rbac_user()
            self.generate_docs(doc_ops="create")
            tasks_info = self.data_load(
                scope=scope_name,
                collections=self.bucket.scopes[scope_name].collections.keys())
            for task in tasks_info:
                self.task.jython_task_manager.get_task_result(task)
            self.bucket_util.print_bucket_stats()
            self.print_crud_stats()
            self.get_bucket_dgm(self.bucket)
            self.create_perc = self.input.param("create_perc", 100)
            ###################################################################
            self.log.info("Step 5: Rebalance in with Loading of docs")
            self.generate_docs(doc_ops="create")
            self.set_num_writer_and_reader_threads(
                num_writer_threads="disk_io_optimized",
                num_reader_threads="disk_io_optimized")
            rebalance_task = self.rebalance(nodes_in=1, nodes_out=0)
            tasks_info = self.data_load(
                scope=scope_name,
                collections=self.bucket.scopes[scope_name].collections.keys())
            self.set_num_writer_and_reader_threads(
                num_writer_threads=self.new_num_writer_threads,
                num_reader_threads=self.new_num_reader_threads)
            self.task.jython_task_manager.get_task_result(rebalance_task)
            self.assertTrue(rebalance_task.result, "Rebalance Failed")
            self.data_validation(
                tasks_info,
                scope=scope_name,
                collections=self.bucket.scopes[scope_name].collections.keys())
            self.bucket_util.print_bucket_stats()
            self.print_crud_stats()
            self.get_bucket_dgm(self.bucket)

            ###################################################################
            self.log.info("Step 6: Rebalance Out with Loading of docs")
            self.generate_docs()
            self.set_num_writer_and_reader_threads(
                num_writer_threads="disk_io_optimized",
                num_reader_threads="disk_io_optimized")
            rebalance_task = self.rebalance(nodes_in=0, nodes_out=1)
            tasks_info = self.data_load(
                scope=scope_name,
                collections=self.bucket.scopes[scope_name].collections.keys())
            self.set_num_writer_and_reader_threads(
                num_writer_threads=self.new_num_writer_threads,
                num_reader_threads=self.new_num_reader_threads)
            self.task.jython_task_manager.get_task_result(rebalance_task)
            self.assertTrue(rebalance_task.result, "Rebalance Failed")
            self.data_validation(
                tasks_info,
                scope=scope_name,
                collections=self.bucket.scopes[scope_name].collections.keys())
            self.bucket_util.print_bucket_stats()
            self.print_crud_stats()
            self.get_bucket_dgm(self.bucket)

            ###################################################################
            self.log.info("Step 7: Rebalance In_Out with Loading of docs")
            self.generate_docs()
            self.set_num_writer_and_reader_threads(
                num_writer_threads="disk_io_optimized",
                num_reader_threads="disk_io_optimized")
            rebalance_task = self.rebalance(nodes_in=2, nodes_out=1)
            tasks_info = self.data_load(
                scope=scope_name,
                collections=self.bucket.scopes[scope_name].collections.keys())
            self.set_num_writer_and_reader_threads(
                num_writer_threads=self.new_num_writer_threads,
                num_reader_threads=self.new_num_reader_threads)
            self.task.jython_task_manager.get_task_result(rebalance_task)
            self.assertTrue(rebalance_task.result, "Rebalance Failed")
            self.data_validation(
                tasks_info,
                scope=scope_name,
                collections=self.bucket.scopes[scope_name].collections.keys())
            self.bucket_util.print_bucket_stats()
            self.print_crud_stats()
            self.get_bucket_dgm(self.bucket)

            ###################################################################
            self.log.info("Step 8: Swap with Loading of docs")
            self.generate_docs()
            self.set_num_writer_and_reader_threads(
                num_writer_threads="disk_io_optimized",
                num_reader_threads="disk_io_optimized")
            rebalance_task = self.rebalance(nodes_in=1, nodes_out=1)
            tasks_info = self.data_load(
                scope=scope_name,
                collections=self.bucket.scopes[scope_name].collections.keys())
            self.set_num_writer_and_reader_threads(
                num_writer_threads=self.new_num_writer_threads,
                num_reader_threads=self.new_num_reader_threads)

            self.task.jython_task_manager.get_task_result(rebalance_task)
            self.assertTrue(rebalance_task.result, "Rebalance Failed")
            self.data_validation(
                tasks_info,
                scope=scope_name,
                collections=self.bucket.scopes[scope_name].collections.keys())
            self.bucket_util.print_bucket_stats()
            self.print_crud_stats()
            self.get_bucket_dgm(self.bucket)

            ###################################################################
            self.log.info("Step 9: Updating the bucket replica to 2")
            bucket_helper = BucketHelper(self.cluster.master)
            for i in range(len(self.bucket_util.buckets)):
                bucket_helper.change_bucket_props(self.bucket_util.buckets[i],
                                                  replicaNumber=2)
            self.generate_docs()
            self.set_num_writer_and_reader_threads(
                num_writer_threads="disk_io_optimized",
                num_reader_threads="disk_io_optimized")
            rebalance_task = self.rebalance(nodes_in=1, nodes_out=0)
            tasks_info = self.data_load(
                scope=scope_name,
                collections=self.bucket.scopes[scope_name].collections.keys())
            self.set_num_writer_and_reader_threads(
                num_writer_threads=self.new_num_writer_threads,
                num_reader_threads=self.new_num_reader_threads)
            self.task.jython_task_manager.get_task_result(rebalance_task)
            self.assertTrue(rebalance_task.result, "Rebalance Failed")
            self.data_validation(
                tasks_info,
                scope=scope_name,
                collections=self.bucket.scopes[scope_name].collections.keys())
            self.bucket_util.print_bucket_stats()
            self.print_crud_stats()
            self.get_bucket_dgm(self.bucket)

            ###################################################################
            self.log.info("Step 10: Stopping and restarting memcached process")
            self.generate_docs()
            self.set_num_writer_and_reader_threads(
                num_writer_threads=self.new_num_writer_threads,
                num_reader_threads=self.new_num_reader_threads)
            rebalance_task = self.task.async_rebalance(self.cluster.servers,
                                                       [], [])
            tasks_info = self.data_load(
                scope=scope_name,
                collections=self.bucket.scopes[scope_name].collections.keys())
            self.set_num_writer_and_reader_threads(
                num_writer_threads="disk_io_optimized",
                num_reader_threads="disk_io_optimized")

            self.task.jython_task_manager.get_task_result(rebalance_task)
            self.assertTrue(rebalance_task.result, "Rebalance Failed")
            self.stop_process()
            self.data_validation(
                tasks_info,
                scope=scope_name,
                collections=self.bucket.scopes[scope_name].collections.keys())
            self.bucket_util.print_bucket_stats()
            self.print_crud_stats()
            self.get_bucket_dgm(self.bucket)

            ###################################################################
            self.log.info(
                "Step 11: Failover a node and RebalanceOut that node \
            with loading in parallel")
            self.std_vbucket_dist = self.input.param("std_vbucket_dist", None)
            std = self.std_vbucket_dist or 1.0

            prev_failover_stats = self.bucket_util.get_failovers_logs(
                self.cluster.nodes_in_cluster, self.bucket_util.buckets)

            disk_replica_dataset, disk_active_dataset = self.bucket_util.\
                get_and_compare_active_replica_data_set_all(
                    self.cluster.nodes_in_cluster, self.bucket_util.buckets,
                    path=None)

            self.rest = RestConnection(self.cluster.master)
            self.nodes = self.cluster_util.get_nodes(self.cluster.master)
            self.chosen = self.cluster_util.pick_nodes(self.cluster.master,
                                                       howmany=1)

            # Mark Node for failover
            self.generate_docs()
            tasks_info = self.data_load(
                scope=scope_name,
                collections=self.bucket.scopes[scope_name].collections.keys())
            self.success_failed_over = self.rest.fail_over(self.chosen[0].id,
                                                           graceful=True)
            self.sleep(10)
            self.rest.monitorRebalance()
            self.nodes = self.rest.node_statuses()
            self.set_num_writer_and_reader_threads(
                num_writer_threads=self.new_num_writer_threads,
                num_reader_threads=self.new_num_reader_threads)
            self.rest.rebalance(otpNodes=[node.id for node in self.nodes],
                                ejectedNodes=[self.chosen[0].id])
            self.assertTrue(self.rest.monitorRebalance(stop_if_loop=True),
                            msg="Rebalance failed")

            servs_out = [
                node for node in self.cluster.servers
                if node.ip == self.chosen[0].ip
            ]
            self.cluster.nodes_in_cluster = list(
                set(self.cluster.nodes_in_cluster) - set(servs_out))
            self.available_servers += servs_out

            self.data_validation(
                tasks_info,
                scope=scope_name,
                collections=self.bucket.scopes[scope_name].collections.keys())

            self.bucket_util.compare_failovers_logs(
                prev_failover_stats, self.cluster.nodes_in_cluster,
                self.bucket_util.buckets)

            self.bucket_util.data_analysis_active_replica_all(
                disk_active_dataset,
                disk_replica_dataset,
                self.cluster.servers[:self.nodes_in + self.nodes_init],
                self.bucket_util.buckets,
                path=None)
            nodes = self.cluster_util.get_nodes_in_cluster(self.cluster.master)
            self.bucket_util.vb_distribution_analysis(
                servers=nodes,
                buckets=self.bucket_util.buckets,
                num_replicas=2,
                std=std,
                total_vbuckets=self.cluster_util.vbuckets)
            rebalance_task = self.rebalance(nodes_in=1, nodes_out=0)
            self.task.jython_task_manager.get_task_result(rebalance_task)
            self.bucket_util.print_bucket_stats()
            self.print_crud_stats()
            self.get_bucket_dgm(self.bucket)

            ###################################################################
            self.log.info("Step 12: Failover a node and FullRecovery\
             that node")

            self.std_vbucket_dist = self.input.param("std_vbucket_dist", None)
            std = self.std_vbucket_dist or 1.0

            prev_failover_stats = self.bucket_util.get_failovers_logs(
                self.cluster.nodes_in_cluster, self.bucket_util.buckets)

            disk_replica_dataset, disk_active_dataset = self.bucket_util.\
                get_and_compare_active_replica_data_set_all(
                    self.cluster.nodes_in_cluster,
                    self.bucket_util.buckets,
                    path=None)

            self.rest = RestConnection(self.cluster.master)
            self.nodes = self.cluster_util.get_nodes(self.cluster.master)
            self.chosen = self.cluster_util.pick_nodes(self.cluster.master,
                                                       howmany=1)

            self.generate_docs()
            tasks_info = self.data_load(
                scope=scope_name,
                collections=self.bucket.scopes[scope_name].collections.keys())
            # Mark Node for failover
            self.success_failed_over = self.rest.fail_over(self.chosen[0].id,
                                                           graceful=True)
            self.sleep(10)
            self.rest.monitorRebalance()
            # Mark Node for full recovery
            if self.success_failed_over:
                self.rest.set_recovery_type(otpNode=self.chosen[0].id,
                                            recoveryType="full")

            self.set_num_writer_and_reader_threads(
                num_writer_threads=self.new_num_writer_threads,
                num_reader_threads=self.new_num_reader_threads)

            rebalance_task = self.task.async_rebalance(
                self.cluster.servers[:self.nodes_init], [], [])

            self.set_num_writer_and_reader_threads(
                num_writer_threads="disk_io_optimized",
                num_reader_threads="disk_io_optimized")

            self.task.jython_task_manager.get_task_result(rebalance_task)
            self.assertTrue(rebalance_task.result, "Rebalance Failed")

            self.data_validation(
                tasks_info,
                scope=scope_name,
                collections=self.bucket.scopes[scope_name].collections.keys())

            self.bucket_util.compare_failovers_logs(
                prev_failover_stats, self.cluster.nodes_in_cluster,
                self.bucket_util.buckets)

            self.bucket_util.data_analysis_active_replica_all(
                disk_active_dataset,
                disk_replica_dataset,
                self.cluster.servers[:self.nodes_in + self.nodes_init],
                self.bucket_util.buckets,
                path=None)
            nodes = self.cluster_util.get_nodes_in_cluster(self.cluster.master)
            self.bucket_util.vb_distribution_analysis(
                servers=nodes,
                buckets=self.bucket_util.buckets,
                num_replicas=2,
                std=std,
                total_vbuckets=self.cluster_util.vbuckets)
            self.bucket_util.print_bucket_stats()
            self.print_crud_stats()
            self.get_bucket_dgm(self.bucket)

            ###################################################################
            self.log.info("Step 13: Failover a node and DeltaRecovery that \
            node with loading in parallel")

            self.std_vbucket_dist = self.input.param("std_vbucket_dist", None)
            std = self.std_vbucket_dist or 1.0

            prev_failover_stats = self.bucket_util.get_failovers_logs(
                self.cluster.nodes_in_cluster, self.bucket_util.buckets)

            disk_replica_dataset, disk_active_dataset = self.bucket_util.\
                get_and_compare_active_replica_data_set_all(
                    self.cluster.nodes_in_cluster,
                    self.bucket_util.buckets,
                    path=None)

            self.rest = RestConnection(self.cluster.master)
            self.nodes = self.cluster_util.get_nodes(self.cluster.master)
            self.chosen = self.cluster_util.pick_nodes(self.cluster.master,
                                                       howmany=1)

            self.generate_docs()
            tasks_info = self.data_load(
                scope=scope_name,
                collections=self.bucket.scopes[scope_name].collections.keys())
            # Mark Node for failover
            self.success_failed_over = self.rest.fail_over(self.chosen[0].id,
                                                           graceful=True)
            self.sleep(10)
            self.rest.monitorRebalance()
            if self.success_failed_over:
                self.rest.set_recovery_type(otpNode=self.chosen[0].id,
                                            recoveryType="delta")
            self.set_num_writer_and_reader_threads(
                num_writer_threads=self.new_num_writer_threads,
                num_reader_threads=self.new_num_reader_threads)

            rebalance_task = self.task.async_rebalance(
                self.cluster.servers[:self.nodes_init], [], [])
            self.set_num_writer_and_reader_threads(
                num_writer_threads="disk_io_optimized",
                num_reader_threads="disk_io_optimized")
            self.task.jython_task_manager.get_task_result(rebalance_task)
            self.assertTrue(rebalance_task.result, "Rebalance Failed")

            self.data_validation(
                tasks_info,
                scope=scope_name,
                collections=self.bucket.scopes[scope_name].collections.keys())

            self.bucket_util.compare_failovers_logs(
                prev_failover_stats, self.cluster.nodes_in_cluster,
                self.bucket_util.buckets)

            self.bucket_util.data_analysis_active_replica_all(
                disk_active_dataset,
                disk_replica_dataset,
                self.cluster.servers[:self.nodes_in + self.nodes_init],
                self.bucket_util.buckets,
                path=None)
            nodes = self.cluster_util.get_nodes_in_cluster(self.cluster.master)
            self.bucket_util.vb_distribution_analysis(
                servers=nodes,
                buckets=self.bucket_util.buckets,
                num_replicas=2,
                std=std,
                total_vbuckets=self.cluster_util.vbuckets)
            self.bucket_util.print_bucket_stats()
            self.print_crud_stats()
            self.get_bucket_dgm(self.bucket)

            #######################################################################
            self.log.info("Step 14: Updating the bucket replica to 1")
            bucket_helper = BucketHelper(self.cluster.master)
            for i in range(len(self.bucket_util.buckets)):
                bucket_helper.change_bucket_props(self.bucket_util.buckets[i],
                                                  replicaNumber=1)
            self.generate_docs()
            self.set_num_writer_and_reader_threads(
                num_writer_threads=self.new_num_writer_threads,
                num_reader_threads=self.new_num_reader_threads)
            rebalance_task = self.task.async_rebalance(self.cluster.servers,
                                                       [], [])
            tasks_info = self.data_load(
                scope=scope_name,
                collections=self.bucket.scopes[scope_name].collections.keys())
            self.set_num_writer_and_reader_threads(
                num_writer_threads="disk_io_optimized",
                num_reader_threads="disk_io_optimized")

            self.task.jython_task_manager.get_task_result(rebalance_task)
            self.assertTrue(rebalance_task.result, "Rebalance Failed")
            self.data_validation(
                tasks_info,
                scope=scope_name,
                collections=self.bucket.scopes[scope_name].collections.keys())
            self.bucket_util.print_bucket_stats()
            self.print_crud_stats()
            self.get_bucket_dgm(self.bucket)

            #######################################################################
            self.log.info("Step 15: Flush the bucket and \
            start the entire process again")
            self.loop += 1
            if self.loop < self.iterations:
                # Flush the bucket
                self.bucket_util.flush_all_buckets(self.cluster.master)
                self.sleep(10)
                if len(self.cluster.nodes_in_cluster) > self.nodes_init:
                    nodes_cluster = self.cluster.nodes_in_cluster[:]
                    nodes_cluster.remove(self.cluster.master)
                    servs_out = random.sample(
                        nodes_cluster,
                        int(
                            len(self.cluster.nodes_in_cluster) -
                            self.nodes_init))
                    rebalance_task = self.task.async_rebalance(
                        self.cluster.servers[:self.nodes_init], [], servs_out)

                    self.task.jython_task_manager.get_task_result(
                        rebalance_task)
                    self.available_servers += servs_out
                    self.cluster.nodes_in_cluster = list(
                        set(self.cluster.nodes_in_cluster) - set(servs_out))
                    self.get_bucket_dgm(self.bucket)
            else:
                self.log.info("Volume Test Run Complete")
                self.get_bucket_dgm(self.bucket)
Esempio n. 7
0
    def test_volume_taf(self):
        self.loop = 0
        # self.set_metadata_purge_interval()
        while self.loop < self.iterations:
            self.log.info("Finished steps 1-4 successfully in setup")
            self.log.info("Step 5: Rebalance in with Loading of docs")
            if self.data_load_stage == "before":
                task = self.data_load_collection(async_load=False)
                if task.result is False:
                    self.fail("Doc loading failed")
            rebalance_task = self.rebalance(nodes_in=1, nodes_out=0)
            if self.data_load_stage == "during":
                task = self.data_load_collection()
            self.wait_for_rebalance_to_complete(rebalance_task)
            if self.data_load_stage == "during":
                self.wait_for_async_data_load_to_complete(task)
            self.data_validation_collection()
            self.bucket_util.print_bucket_stats()
            #########################################################################################################################
            self.log.info("Step 6: Rebalance Out with Loading of docs")
            if self.data_load_stage == "before":
                task = self.data_load_collection(async_load=False)
                if task.result is False:
                    self.fail("Doc loading failed")
            rebalance_task = self.rebalance(nodes_in=0, nodes_out=1)
            if self.data_load_stage == "during":
                task = self.data_load_collection()
            self.wait_for_rebalance_to_complete(rebalance_task)
            if self.data_load_stage == "during":
                self.wait_for_async_data_load_to_complete(task)
            self.data_validation_collection()
            self.bucket_util.print_bucket_stats()
            #######################################################################################################################
            self.log.info("Step 7: Rebalance In_Out with Loading of docs")
            if self.data_load_stage == "before":
                task = self.data_load_collection(async_load=False)
                if task.result is False:
                    self.fail("Doc loading failed")
            rebalance_task = self.rebalance(nodes_in=2, nodes_out=1)
            if self.data_load_stage == "during":
                task = self.data_load_collection()
            self.wait_for_rebalance_to_complete(rebalance_task)
            if self.data_load_stage == "during":
                self.wait_for_async_data_load_to_complete(task)
            self.data_validation_collection()
            self.bucket_util.print_bucket_stats()
            ########################################################################################################################
            self.log.info("Step 8: Swap with Loading of docs")
            if self.data_load_stage == "before":
                task = self.data_load_collection(async_load=False)
                if task.result is False:
                    self.fail("Doc loading failed")
            rebalance_task = self.rebalance(nodes_in=1, nodes_out=1)
            if self.data_load_stage == "during":
                task = self.data_load_collection()
            self.wait_for_rebalance_to_complete(rebalance_task)
            if self.data_load_stage == "during":
                self.wait_for_async_data_load_to_complete(task)
            self.data_validation_collection()
            self.tasks = []
            self.bucket_util.print_bucket_stats()
            ########################################################################################################################
            self.log.info("Step 9: Updating the bucket replica to 2")
            if self.data_load_stage == "before":
                task = self.data_load_collection(async_load=False)
                if task.result is False:
                    self.fail("Doc loading failed")
            bucket_helper = BucketHelper(self.cluster.master)
            for i in range(len(self.bucket_util.buckets)):
                bucket_helper.change_bucket_props(self.bucket_util.buckets[i],
                                                  replicaNumber=2)
            rebalance_task = self.rebalance(nodes_in=1, nodes_out=0)
            if self.data_load_stage == "during":
                task = self.data_load_collection()
            self.wait_for_rebalance_to_complete(rebalance_task)
            if self.data_load_stage == "during":
                self.wait_for_async_data_load_to_complete(task)
            self.data_validation_collection()
            self.bucket_util.print_bucket_stats()
            ########################################################################################################################
            if self.contains_ephemeral:
                self.log.info("No Memcached kill for ephemeral bucket")
            else:
                self.log.info(
                    "Step 10: Stopping and restarting memcached process")
                if self.data_load_stage == "before":
                    task = self.data_load_collection(async_load=False)
                    if task.result is False:
                        self.fail("Doc loading failed")
                rebalance_task = self.task.async_rebalance(
                    self.cluster.servers, [], [], retry_get_process_num=100)
                if self.data_load_stage == "during":
                    task = self.data_load_collection()
                self.wait_for_rebalance_to_complete(rebalance_task)
                self.stop_process()
                if self.data_load_stage == "during":
                    self.wait_for_async_data_load_to_complete(task)
                self.data_validation_collection()
                self.bucket_util.print_bucket_stats()
            ########################################################################################################################
            step_count = 10
            for failover in ["Graceful", "Hard"]:
                for action in [
                        "RebalanceOut", "FullRecovery", "DeltaRecovery"
                ]:
                    step_count = step_count + 1
                    self.log.info(
                        "Step {0}: {1} Failover a node and {2} that node with data load in parallel"
                        .format(step_count, failover, action))
                    if self.data_load_stage == "before":
                        task = self.data_load_collection(async_load=False)
                        if task.result is False:
                            self.fail("Doc loading failed")

                    self.std_vbucket_dist = self.input.param(
                        "std_vbucket_dist", None)
                    std = self.std_vbucket_dist or 1.0

                    prev_failover_stats = self.bucket_util.get_failovers_logs(
                        self.cluster.nodes_in_cluster,
                        self.bucket_util.buckets)
                    prev_vbucket_stats = self.bucket_util.get_vbucket_seqnos(
                        self.cluster.nodes_in_cluster,
                        self.bucket_util.buckets)
                    self.sleep(10)

                    disk_replica_dataset, disk_active_dataset = self.bucket_util.get_and_compare_active_replica_data_set_all(
                        self.cluster.nodes_in_cluster,
                        self.bucket_util.buckets,
                        path=None)

                    self.rest = RestConnection(self.cluster.master)
                    self.nodes = self.cluster_util.get_nodes(
                        self.cluster.master)
                    self.chosen = self.cluster_util.pick_nodes(
                        self.cluster.master, howmany=1)

                    if self.data_load_stage == "during":
                        task = self.data_load_collection()
                    # Mark Node for failover
                    if failover == "Graceful":
                        self.success_failed_over = self.rest.fail_over(
                            self.chosen[0].id, graceful=True)
                    else:
                        self.success_failed_over = self.rest.fail_over(
                            self.chosen[0].id, graceful=False)

                    self.sleep(300)
                    self.wait_for_failover_or_assert(1)

                    # Perform the action
                    if action == "RebalanceOut":
                        self.nodes = self.rest.node_statuses()
                        self.rest.rebalance(
                            otpNodes=[node.id for node in self.nodes],
                            ejectedNodes=[self.chosen[0].id])
                        # self.sleep(600)
                        self.assertTrue(
                            self.rest.monitorRebalance(stop_if_loop=False),
                            msg="Rebalance failed")
                        servs_out = [
                            node for node in self.cluster.servers
                            if node.ip == self.chosen[0].ip
                        ]
                        self.cluster.nodes_in_cluster = list(
                            set(self.cluster.nodes_in_cluster) -
                            set(servs_out))
                        self.available_servers += servs_out
                        self.sleep(10)
                    else:
                        if action == "FullRecovery":
                            if self.success_failed_over:
                                self.rest.set_recovery_type(
                                    otpNode=self.chosen[0].id,
                                    recoveryType="full")
                        elif action == "DeltaRecovery":
                            if self.success_failed_over:
                                self.rest.set_recovery_type(
                                    otpNode=self.chosen[0].id,
                                    recoveryType="delta")

                        rebalance_task = self.task.async_rebalance(
                            self.cluster.servers[:self.nodes_init], [], [],
                            retry_get_process_num=100)
                        self.wait_for_rebalance_to_complete(rebalance_task)
                        self.sleep(10)

                    if self.data_load_stage == "during":
                        self.wait_for_async_data_load_to_complete(task)
                    self.data_validation_collection()

                    self.bucket_util.compare_failovers_logs(
                        prev_failover_stats, self.cluster.nodes_in_cluster,
                        self.bucket_util.buckets)
                    self.sleep(10)

                    self.bucket_util.data_analysis_active_replica_all(
                        disk_active_dataset,
                        disk_replica_dataset,
                        self.cluster.servers[:self.nodes_in + self.nodes_init],
                        self.bucket_util.buckets,
                        path=None)
                    nodes = self.cluster_util.get_nodes_in_cluster(
                        self.cluster.master)
                    self.bucket_util.vb_distribution_analysis(
                        servers=nodes,
                        buckets=self.bucket_util.buckets,
                        num_replicas=2,
                        std=std,
                        total_vbuckets=self.cluster_util.vbuckets)
                    self.sleep(10)
                    self.tasks = []
                    # Bring back the rebalance out node back to cluster for further steps
                    if action == "RebalanceOut":
                        self.sleep(120)
                        rebalance_task = self.rebalance(nodes_in=1,
                                                        nodes_out=0)
                        # self.sleep(600)
                        self.wait_for_rebalance_to_complete(rebalance_task)
                    self.bucket_util.print_bucket_stats()
            ########################################################################################################################
            self.log.info("Step 17: Updating the bucket replica to 1")
            if self.data_load_stage == "before":
                task = self.data_load_collection(async_load=False)
                if task.result is False:
                    self.fail("Doc loading failed")
            bucket_helper = BucketHelper(self.cluster.master)
            for i in range(len(self.bucket_util.buckets)):
                bucket_helper.change_bucket_props(self.bucket_util.buckets[i],
                                                  replicaNumber=1)
            rebalance_task = self.task.async_rebalance(
                self.cluster.servers, [], [], retry_get_process_num=100)
            if self.data_load_stage == "during":
                task = self.data_load_collection()
            self.wait_for_rebalance_to_complete(rebalance_task)
            if self.data_load_stage == "during":
                self.wait_for_async_data_load_to_complete(task)
            self.data_validation_collection()
            self.tasks = []
            self.bucket_util.print_bucket_stats()
            ########################################################################################################################
            self.log.info(
                "Step 18: Flush the bucket and start the entire process again")
            self.loop += 1
            if self.loop < self.iterations:
                # Flush the bucket
                self.bucket_util.flush_all_buckets(self.cluster.master)
                self.sleep(10)
                if len(self.cluster.nodes_in_cluster) > self.nodes_init:
                    self.nodes_cluster = self.cluster.nodes_in_cluster[:]
                    self.nodes_cluster.remove(self.cluster.master)
                    servs_out = random.sample(
                        self.nodes_cluster,
                        int(
                            len(self.cluster.nodes_in_cluster) -
                            self.nodes_init))
                    rebalance_task = self.task.async_rebalance(
                        self.cluster.servers[:self.nodes_init], [],
                        servs_out,
                        retry_get_process_num=100)
                    self.wait_for_rebalance_to_complete(rebalance_task)
                    self.available_servers += servs_out
                    self.cluster.nodes_in_cluster = list(
                        set(self.cluster.nodes_in_cluster) - set(servs_out))
            else:
                self.log.info("Volume Test Run Complete")
Esempio n. 8
0
    def test_volume_taf(self):
        self.loop = 0
        while self.loop < self.iterations:
            self.log.info("Finished steps 1-4 successfully in setup")
            self.log.info("Step 5: Rebalance in with Loading of docs")
            rebalance_task = self.rebalance(nodes_in=1, nodes_out=0)
            task = self.data_load_collection()
            self.task.jython_task_manager.get_task_result(rebalance_task)
            self.assertTrue(rebalance_task.result,
                            "rebalance failed, stuck or did not complete")
            self.wait_for_async_data_load_to_complete(task)
            self.data_validation_collection()
            self.bucket_util.print_bucket_stats()
            #########################################################################################################################
            self.log.info("Step 6: Rebalance Out with Loading of docs")
            rebalance_task = self.rebalance(nodes_in=0, nodes_out=1)
            task = self.data_load_collection()
            self.task.jython_task_manager.get_task_result(rebalance_task)
            self.assertTrue(rebalance_task.result,
                            "rebalance failed, stuck or did not complete")
            self.wait_for_async_data_load_to_complete(task)
            self.data_validation_collection()
            self.bucket_util.print_bucket_stats()
            #######################################################################################################################
            self.log.info("Step 7: Rebalance In_Out with Loading of docs")
            rebalance_task = self.rebalance(nodes_in=2, nodes_out=1)
            task = self.data_load_collection()
            self.task.jython_task_manager.get_task_result(rebalance_task)
            self.assertTrue(rebalance_task.result,
                            "rebalance failed, stuck or did not complete")
            self.wait_for_async_data_load_to_complete(task)
            self.data_validation_collection()
            self.bucket_util.print_bucket_stats()
            ########################################################################################################################
            self.log.info("Step 8: Swap with Loading of docs")
            rebalance_task = self.rebalance(nodes_in=1, nodes_out=1)
            task = self.data_load_collection()
            self.task.jython_task_manager.get_task_result(rebalance_task)
            self.assertTrue(rebalance_task.result,
                            "rebalance failed, stuck or did not complete")
            self.wait_for_async_data_load_to_complete(task)
            self.data_validation_collection()
            self.tasks = []
            self.bucket_util.print_bucket_stats()
            ########################################################################################################################
            self.log.info("Step 9: Updating the bucket replica to 2")
            bucket_helper = BucketHelper(self.cluster.master)
            for i in range(len(self.bucket_util.buckets)):
                bucket_helper.change_bucket_props(self.bucket_util.buckets[i],
                                                  replicaNumber=2)
            rebalance_task = self.rebalance(nodes_in=1, nodes_out=0)
            task = self.data_load_collection()
            self.task.jython_task_manager.get_task_result(rebalance_task)
            self.assertTrue(rebalance_task.result,
                            "rebalance failed, stuck or did not complete")
            self.wait_for_async_data_load_to_complete(task)
            self.data_validation_collection()
            self.bucket_util.print_bucket_stats()
            ########################################################################################################################
            if "ephemeral" in self.bucket_type:
                self.log.info("No Memcached kill for epehemral bucket")
            else:
                self.log.info(
                    "Step 10: Stopping and restarting memcached process")
                rebalance_task = self.task.async_rebalance(
                    self.cluster.servers, [], [])
                task = self.data_load_collection()
                self.task.jython_task_manager.get_task_result(rebalance_task)
                self.assertTrue(rebalance_task.result,
                                "rebalance failed, stuck or did not complete")
                self.stop_process()
                self.wait_for_async_data_load_to_complete(task)
                self.data_validation_collection()
                self.bucket_util.print_bucket_stats()
            ########################################################################################################################
            self.log.info(
                "Step 11: Failover a node and RebalanceOut that node with loading in parallel"
            )
            self.std_vbucket_dist = self.input.param("std_vbucket_dist", None)
            std = self.std_vbucket_dist or 1.0

            prev_failover_stats = self.bucket_util.get_failovers_logs(
                self.cluster.nodes_in_cluster, self.bucket_util.buckets)
            prev_vbucket_stats = self.bucket_util.get_vbucket_seqnos(
                self.cluster.nodes_in_cluster, self.bucket_util.buckets)
            self.sleep(10)

            disk_replica_dataset, disk_active_dataset = self.bucket_util.get_and_compare_active_replica_data_set_all(
                self.cluster.nodes_in_cluster,
                self.bucket_util.buckets,
                path=None)

            self.rest = RestConnection(self.cluster.master)
            self.nodes = self.cluster_util.get_nodes(self.cluster.master)
            self.chosen = self.cluster_util.pick_nodes(self.cluster.master,
                                                       howmany=1)

            # Mark Node for failover
            task = self.data_load_collection()
            self.success_failed_over = self.rest.fail_over(self.chosen[0].id,
                                                           graceful=False)

            self.sleep(300)
            self.nodes = self.rest.node_statuses()
            self.rest.rebalance(otpNodes=[node.id for node in self.nodes],
                                ejectedNodes=[self.chosen[0].id])
            # self.sleep(600)
            self.assertTrue(self.rest.monitorRebalance(stop_if_loop=True),
                            msg="Rebalance failed")

            servs_out = [
                node for node in self.cluster.servers
                if node.ip == self.chosen[0].ip
            ]
            self.cluster.nodes_in_cluster = list(
                set(self.cluster.nodes_in_cluster) - set(servs_out))
            self.available_servers += servs_out
            self.sleep(10)

            self.wait_for_async_data_load_to_complete(task)
            self.data_validation_collection()

            self.bucket_util.compare_failovers_logs(
                prev_failover_stats, self.cluster.nodes_in_cluster,
                self.bucket_util.buckets)
            self.sleep(10)

            self.bucket_util.data_analysis_active_replica_all(
                disk_active_dataset,
                disk_replica_dataset,
                self.cluster.servers[:self.nodes_in + self.nodes_init],
                self.bucket_util.buckets,
                path=None)
            nodes = self.cluster_util.get_nodes_in_cluster(self.cluster.master)
            self.bucket_util.vb_distribution_analysis(
                servers=nodes,
                buckets=self.bucket_util.buckets,
                num_replicas=2,
                std=std,
                total_vbuckets=self.cluster_util.vbuckets)
            self.sleep(10)
            self.tasks = []
            rebalance_task = self.rebalance(nodes_in=1, nodes_out=0)
            # self.sleep(600)
            self.task.jython_task_manager.get_task_result(rebalance_task)
            self.assertTrue(rebalance_task.result,
                            "rebalance failed, stuck or did not complete")
            self.bucket_util.print_bucket_stats()
            ########################################################################################################################
            self.log.info(
                "Step 12: Failover a node and FullRecovery that node")

            self.std_vbucket_dist = self.input.param("std_vbucket_dist", None)
            std = self.std_vbucket_dist or 1.0

            prev_failover_stats = self.bucket_util.get_failovers_logs(
                self.cluster.nodes_in_cluster, self.bucket_util.buckets)
            prev_vbucket_stats = self.bucket_util.get_vbucket_seqnos(
                self.cluster.nodes_in_cluster, self.bucket_util.buckets)
            self.sleep(10)

            disk_replica_dataset, disk_active_dataset = self.bucket_util.get_and_compare_active_replica_data_set_all(
                self.cluster.nodes_in_cluster,
                self.bucket_util.buckets,
                path=None)

            self.rest = RestConnection(self.cluster.master)
            self.nodes = self.cluster_util.get_nodes(self.cluster.master)
            self.chosen = self.cluster_util.pick_nodes(self.cluster.master,
                                                       howmany=1)

            task = self.data_load_collection()
            # Mark Node for failover
            self.success_failed_over = self.rest.fail_over(self.chosen[0].id,
                                                           graceful=False)

            self.sleep(300)

            # Mark Node for full recovery
            if self.success_failed_over:
                self.rest.set_recovery_type(otpNode=self.chosen[0].id,
                                            recoveryType="full")

            rebalance_task = self.task.async_rebalance(
                self.cluster.servers[:self.nodes_init], [], [])
            self.task.jython_task_manager.get_task_result(rebalance_task)
            self.assertTrue(rebalance_task.result,
                            "rebalance failed, stuck or did not complete")
            self.sleep(10)

            self.wait_for_async_data_load_to_complete(task)
            self.data_validation_collection()

            self.bucket_util.compare_failovers_logs(
                prev_failover_stats, self.cluster.nodes_in_cluster,
                self.bucket_util.buckets)
            self.sleep(10)

            self.bucket_util.data_analysis_active_replica_all(
                disk_active_dataset,
                disk_replica_dataset,
                self.cluster.servers[:self.nodes_in + self.nodes_init],
                self.bucket_util.buckets,
                path=None)
            nodes = self.cluster_util.get_nodes_in_cluster(self.cluster.master)
            self.bucket_util.vb_distribution_analysis(
                servers=nodes,
                buckets=self.bucket_util.buckets,
                num_replicas=2,
                std=std,
                total_vbuckets=self.cluster_util.vbuckets)
            self.sleep(10)
            self.tasks = []
            self.bucket_util.print_bucket_stats()
            ########################################################################################################################
            self.log.info(
                "Step 13: Failover a node and DeltaRecovery that node with loading in parallel"
            )

            self.std_vbucket_dist = self.input.param("std_vbucket_dist", None)
            std = self.std_vbucket_dist or 1.0

            prev_failover_stats = self.bucket_util.get_failovers_logs(
                self.cluster.nodes_in_cluster, self.bucket_util.buckets)
            prev_vbucket_stats = self.bucket_util.get_vbucket_seqnos(
                self.cluster.nodes_in_cluster, self.bucket_util.buckets)
            self.sleep(10)

            disk_replica_dataset, disk_active_dataset = self.bucket_util.get_and_compare_active_replica_data_set_all(
                self.cluster.nodes_in_cluster,
                self.bucket_util.buckets,
                path=None)

            self.rest = RestConnection(self.cluster.master)
            self.nodes = self.cluster_util.get_nodes(self.cluster.master)
            self.chosen = self.cluster_util.pick_nodes(self.cluster.master,
                                                       howmany=1)

            task = self.data_load_collection()
            # Mark Node for failover
            self.success_failed_over = self.rest.fail_over(self.chosen[0].id,
                                                           graceful=False)

            self.sleep(300)
            if self.success_failed_over:
                self.rest.set_recovery_type(otpNode=self.chosen[0].id,
                                            recoveryType="delta")

            rebalance_task = self.task.async_rebalance(
                self.cluster.servers[:self.nodes_init], [], [])

            self.task.jython_task_manager.get_task_result(rebalance_task)
            self.assertTrue(rebalance_task.result,
                            "rebalance failed, stuck or did not complete")
            self.sleep(10)

            self.wait_for_async_data_load_to_complete(task)
            self.data_validation_collection()

            self.bucket_util.compare_failovers_logs(
                prev_failover_stats, self.cluster.nodes_in_cluster,
                self.bucket_util.buckets)
            self.sleep(10)

            self.bucket_util.data_analysis_active_replica_all(
                disk_active_dataset,
                disk_replica_dataset,
                self.cluster.servers[:self.nodes_in + self.nodes_init],
                self.bucket_util.buckets,
                path=None)
            nodes = self.cluster_util.get_nodes_in_cluster(self.cluster.master)
            self.bucket_util.vb_distribution_analysis(
                servers=nodes,
                buckets=self.bucket_util.buckets,
                num_replicas=2,
                std=std,
                total_vbuckets=self.cluster_util.vbuckets)
            self.bucket_util.print_bucket_stats()
            ########################################################################################################################
            self.log.info("Step 14: Updating the bucket replica to 1")
            bucket_helper = BucketHelper(self.cluster.master)
            for i in range(len(self.bucket_util.buckets)):
                bucket_helper.change_bucket_props(self.bucket_util.buckets[i],
                                                  replicaNumber=1)
            rebalance_task = self.task.async_rebalance(self.cluster.servers,
                                                       [], [])
            task = self.data_load_collection()
            self.task.jython_task_manager.get_task_result(rebalance_task)
            self.assertTrue(rebalance_task.result,
                            "rebalance failed, stuck or did not complete")
            self.wait_for_async_data_load_to_complete(task)
            self.data_validation_collection()
            self.tasks = []
            self.bucket_util.print_bucket_stats()
            ########################################################################################################################
            self.log.info(
                "Step 15: Flush the bucket and start the entire process again")
            self.loop += 1
            if self.loop < self.iterations:
                # Flush the bucket
                self.bucket_util.flush_all_buckets(self.cluster.master)
                self.sleep(10)
                if len(self.cluster.nodes_in_cluster) > self.nodes_init:
                    self.nodes_cluster = self.cluster.nodes_in_cluster[:]
                    self.nodes_cluster.remove(self.cluster.master)
                    servs_out = random.sample(
                        self.nodes_cluster,
                        int(
                            len(self.cluster.nodes_in_cluster) -
                            self.nodes_init))
                    rebalance_task = self.task.async_rebalance(
                        self.cluster.servers[:self.nodes_init], [], servs_out)
                    self.task.jython_task_manager.get_task_result(
                        rebalance_task)
                    self.available_servers += servs_out
                    self.cluster.nodes_in_cluster = list(
                        set(self.cluster.nodes_in_cluster) - set(servs_out))
                    self.assertTrue(
                        rebalance_task.result,
                        "rebalance failed, stuck or did not complete")
            else:
                self.log.info("Volume Test Run Complete")
Esempio n. 9
0
    def common_test_body(self, failover_reason, rebalance_type=None):
        """
            Main Test body which contains the flow of the failover basic steps
            1. Starts Operations if programmed into the test case(before/after)
            2. Start View and Index Building operations
            3. Failover K out of N nodes (failover can be HARD/GRACEFUL)
            4.1 Rebalance the cluster is failover of K nodeStatuses
            4.2 Run Add-Back operation with recoveryType = (full/delta)
                with rebalance
            5. Verify all expected operations completed by checking
               stats, replicaiton, views, data correctness
        """
        # Pick the reference node for communication
        # We pick a node in the cluster which will NOT be failed over
        self.filter_list = []
        if self.failoverMaster:
            self.master = self.cluster.servers[1]
        else:
            self.master = self.cluster.master
        self.log.info(
            " Picking node {0} as reference node for test case".format(
                self.master.ip))
        self.print_test_params(failover_reason)
        self.rest = RestConnection(self.master)
        self.nodes = self.rest.node_statuses()
        # Set the data path for the cluster
        self.data_path = self.rest.get_data_path()

        # Variable to decide the durability outcome
        durability_will_fail = False
        # Variable to track the number of nodes failed
        num_nodes_failed = 1

        # Check if the test case has to be run for 3.0.0
        versions = self.rest.get_nodes_versions()
        self.version_greater_than_2_5 = True
        for version in versions:
            if "3" > version:
                self.version_greater_than_2_5 = False

        # Do not run this this test if graceful category is being used
        if not self.version_greater_than_2_5 \
                and (self.graceful or self.recoveryType is not None):
            self.log.error(
                "Can't apply graceful failover to nodes with version < 3.*")
            self.log.error("Please check configuration params: SKIPPING TEST")
            return

        # Find nodes that will under go failover
        if self.failoverMaster:
            self.chosen = RebalanceHelper.pick_nodes(
                self.master, howmany=1, target_node=self.servers[0])
        else:
            self.chosen = RebalanceHelper.pick_nodes(
                self.master, howmany=self.num_failed_nodes)

        # Perform operations - Create/Update/Delete
        # self.withMutationOps = True => Run Operations in parallel to failover
        # self.withMutationOps = False => Run Operations Before failover
        self.load_initial_data()
        if not self.withMutationOps:
            self.run_mutation_operations()
        # Perform View Creation Tasks and
        # check for completion if required before failover
        if self.withViewsOps:
            self.run_view_creation_operations(self.servers)
            if not self.createIndexesDuringFailover:
                self.query_and_monitor_view_tasks(self.servers)

        # Take snap-shot of data set used for validation
        record_static_data_set = {}
        prev_vbucket_stats = {}
        prev_failover_stats = {}
        if not self.withMutationOps:
            record_static_data_set = self.bucket_util.get_data_set_all(
                self.cluster.servers, self.buckets, path=None)

        # Capture  vbucket and failover stats if test version >= 2.5.*
        if self.version_greater_than_2_5 and self.upr_check:
            prev_vbucket_stats = self.bucket_util.get_vbucket_seqnos(
                self.servers, self.buckets)
            prev_failover_stats = self.bucket_util.get_failovers_logs(
                self.servers, self.buckets)

        # Perform Operations related to failover
        if self.withMutationOps or self.withViewsOps or self.compact:
            self.run_failover_operations_with_ops(self.chosen, failover_reason)
        else:
            self.run_failover_operations(self.chosen, failover_reason)

        target_bucket = self.bucket_util.buckets[0]

        # Update new_replica value, if provided in the conf
        if self.new_replica:
            self.num_replicas = self.new_replica
            bucket_helper = BucketHelper(self.master)
            bucket_helper.change_bucket_props(target_bucket.name,
                                              replicaNumber=self.num_replicas)

        # Decide whether the durability is going to fail or not
        if self.num_failed_nodes >= 1 and self.num_replicas > 1:
            durability_will_fail = True

        # Construct target vbucket list from the nodes
        # which are going to be failed over
        vbucket_list = list()
        for target_node in self.chosen:
            shell_conn = RemoteMachineShellConnection(target_node)
            cb_stats = Cbstats(shell_conn)
            vbuckets = cb_stats.vbucket_list(target_bucket.name,
                                             self.target_vbucket_type)
            shell_conn.disconnect()
            vbucket_list += vbuckets

        # Code to generate doc_loaders that will work on vbucket_type
        # based on targeted nodes. This will perform CRUD only on
        # vbuckets which will be affected by the failover
        self.gen_create = doc_generator(self.key,
                                        self.num_items,
                                        self.num_items * 1.5,
                                        target_vbucket=vbucket_list)
        self.gen_update = doc_generator(self.key,
                                        self.num_items / 2,
                                        self.num_items,
                                        target_vbucket=vbucket_list)
        self.gen_delete = doc_generator(self.key,
                                        self.num_items / 4,
                                        self.num_items / 2 - 1,
                                        target_vbucket=vbucket_list)
        self.afterfailover_gen_create = doc_generator(
            self.key,
            self.num_items * 1.6,
            self.num_items * 2,
            target_vbucket=vbucket_list)
        self.afterfailover_gen_update = doc_generator(
            self.key, 1, self.num_items / 4, target_vbucket=vbucket_list)
        self.afterfailover_gen_delete = doc_generator(
            self.key,
            self.num_items * 0.5,
            self.num_items * 0.75,
            target_vbucket=vbucket_list)

        # Perform Add Back Operation with Rebalance
        # or only Rebalance with verifications
        if not self.gracefulFailoverFail and self.runRebalanceAfterFailover:
            if self.failover_onebyone:
                # Reset it back to False
                durability_will_fail = False
                for node_chosen in self.chosen:
                    if num_nodes_failed > 1:
                        durability_will_fail = True

                    if self.add_back_flag:
                        # In add-back case, durability should never fail, since
                        # the num_nodes in the cluster will remain the same
                        self.run_add_back_operation_and_verify(
                            [node_chosen],
                            prev_vbucket_stats,
                            record_static_data_set,
                            prev_failover_stats,
                            rebalance_type=rebalance_type)
                    else:
                        self.run_rebalance_after_failover_and_verify(
                            [node_chosen],
                            prev_vbucket_stats,
                            record_static_data_set,
                            prev_failover_stats,
                            durability_will_fail=durability_will_fail)
                    num_nodes_failed += 1
            else:
                if self.add_back_flag:
                    self.run_add_back_operation_and_verify(
                        self.chosen,
                        prev_vbucket_stats,
                        record_static_data_set,
                        prev_failover_stats,
                        durability_will_fail=durability_will_fail,
                        rebalance_type=rebalance_type)
                else:
                    self.run_rebalance_after_failover_and_verify(
                        self.chosen,
                        prev_vbucket_stats,
                        record_static_data_set,
                        prev_failover_stats,
                        durability_will_fail=durability_will_fail)
        else:
            return

        # Will verify_unacked_bytes only if the durability is not going to fail
        if self.during_ops is None and not durability_will_fail:
            self.bucket_util.verify_unacked_bytes_all_buckets(
                filter_list=self.filter_list)
Esempio n. 10
0
    def test_volume_taf(self):
        ########################################################################################################################
        self.log.info("Step1: Create a n node cluster")
        nodes_init = self.cluster.servers[1:self.nodes_init] if self.nodes_init != 1 else []
        self.task.rebalance([self.cluster.master], nodes_init, [])
        self.cluster.nodes_in_cluster.extend([self.cluster.master] + nodes_init)
        self.query_node = self.cluster.master
        ########################################################################################################################
        self.log.info("Step 2 & 3: Create required buckets.")
        bucket = self.create_required_buckets()
        self.loop = 0
        #######################################################################################################################
        while self.loop<self.iterations:
            self.log.info("Step 4: Pre-Requisites for Loading of docs")
            self.start = 0
            self.bucket_util.add_rbac_user()
            self.end = self.initial_load_count = self.input.param("initial_load", 1000)
            initial_load = doc_generator("Users", self.start, self.start + self.initial_load_count, doc_size=self.doc_size)
            self.initial_data_load(initial_load)
            self.tasks = []
            self.bucket_util.print_bucket_stats()
            self.get_bucket_dgm(bucket)
            ########################################################################################################################
            self.log.info("Step 5: Rebalance in with Loading of docs")
            self.generate_docs()
            self.gen_delete_users=None
            self._iter_count = 0
            if not self.atomicity:
                self.set_num_writer_and_reader_threads(num_writer_threads="disk_io_optimized",
                                                       num_reader_threads="disk_io_optimized")
            rebalance_task = self.rebalance(nodes_in = 1, nodes_out = 0)
            tasks_info = self.data_load()
            if not self.atomicity:
                self.set_num_writer_and_reader_threads(num_writer_threads=self.new_num_writer_threads,
                                                       num_reader_threads=self.new_num_reader_threads)
            # self.sleep(600, "Wait for Rebalance to start")
            self.task.jython_task_manager.get_task_result(rebalance_task)
            reached = RestHelper(self.rest).rebalance_reached(wait_step=120)
            self.assertTrue(reached, "rebalance failed, stuck or did not complete")
            self.data_validation_mode(tasks_info)
            self.tasks = []
            self.bucket_util.print_bucket_stats()
            self.print_crud_stats()
            self.get_bucket_dgm(bucket)
            #########################################################################################################################
            self.log.info("Step 6: Rebalance Out with Loading of docs")
            self.generate_docs()
            if not self.atomicity:
                self.set_num_writer_and_reader_threads(num_writer_threads="disk_io_optimized",
                                                       num_reader_threads="disk_io_optimized")
            rebalance_task = self.rebalance(nodes_in = 0, nodes_out = 1)
            tasks_info = self.data_load()
            if not self.atomicity:
                self.set_num_writer_and_reader_threads(num_writer_threads=self.new_num_writer_threads,
                                                       num_reader_threads=self.new_num_reader_threads)
            # self.sleep(600, "Wait for Rebalance to start")
            self.task.jython_task_manager.get_task_result(rebalance_task)
            reached = RestHelper(self.rest).rebalance_reached(wait_step=120)
            self.assertTrue(reached, "rebalance failed, stuck or did not complete")
            self.data_validation_mode(tasks_info)
            self.tasks = []
            self.bucket_util.print_bucket_stats()
            self.print_crud_stats()
            self.get_bucket_dgm(bucket)
            #######################################################################################################################
            self.log.info("Step 7: Rebalance In_Out with Loading of docs")
            self.generate_docs()
            if not self.atomicity:
                self.set_num_writer_and_reader_threads(num_writer_threads="disk_io_optimized",
                                                       num_reader_threads="disk_io_optimized")
            rebalance_task = self.rebalance(nodes_in = 2, nodes_out = 1)
            tasks_info = self.data_load()
            if not self.atomicity:
                self.set_num_writer_and_reader_threads(num_writer_threads=self.new_num_writer_threads,
                                                       num_reader_threads=self.new_num_reader_threads)
            # self.sleep(600, "Wait for Rebalance to start")
            self.task.jython_task_manager.get_task_result(rebalance_task)
            reached = RestHelper(self.rest).rebalance_reached(wait_step=120)
            self.assertTrue(reached, "rebalance failed, stuck or did not complete")
            self.data_validation_mode(tasks_info)
            self.tasks = []
            self.bucket_util.print_bucket_stats()
            self.print_crud_stats()
            self.get_bucket_dgm(bucket)
            ########################################################################################################################
            self.log.info("Step 8: Swap with Loading of docs")
            self.generate_docs()
            if not self.atomicity:
                self.set_num_writer_and_reader_threads(num_writer_threads="disk_io_optimized",
                                                       num_reader_threads="disk_io_optimized")
            rebalance_task = self.rebalance(nodes_in=1, nodes_out=1)
            tasks_info = self.data_load()
            if not self.atomicity:
                self.set_num_writer_and_reader_threads(num_writer_threads=self.new_num_writer_threads,
                                                       num_reader_threads=self.new_num_reader_threads)
            # self.sleep(600, "Wait for Rebalance to start")
            self.task.jython_task_manager.get_task_result(rebalance_task)
            reached = RestHelper(self.rest).rebalance_reached(wait_step=120)
            self.assertTrue(reached, "rebalance failed, stuck or did not complete")
            self.data_validation_mode(tasks_info)
            self.tasks = []
            self.bucket_util.print_bucket_stats()
            self.print_crud_stats()
            self.get_bucket_dgm(bucket)
            ########################################################################################################################
            self.log.info("Step 9: Updating the bucket replica to 2")
            bucket_helper = BucketHelper(self.cluster.master)
            for i in range(len(self.bucket_util.buckets)):
                bucket_helper.change_bucket_props(
                    self.bucket_util.buckets[i], replicaNumber=2)
            self.generate_docs()
            if not self.atomicity:
                self.set_num_writer_and_reader_threads(num_writer_threads="disk_io_optimized",
                                                       num_reader_threads="disk_io_optimized")
            rebalance_task = self.rebalance(nodes_in =1, nodes_out= 0)
            tasks_info = self.data_load()
            if not self.atomicity:
                self.set_num_writer_and_reader_threads(num_writer_threads=self.new_num_writer_threads,
                                                       num_reader_threads=self.new_num_reader_threads)
            # self.sleep(600, "Wait for Rebalance to start")
            self.task.jython_task_manager.get_task_result(rebalance_task)
            reached = RestHelper(self.rest).rebalance_reached(wait_step=120)
            self.assertTrue(reached, "rebalance failed, stuck or did not complete")
            self.data_validation_mode(tasks_info)
            self.tasks = []
            self.bucket_util.print_bucket_stats()
            self.print_crud_stats()
            self.get_bucket_dgm(bucket)
            ########################################################################################################################
            if "ephemeral" in self.bucket_type:
                self.log.info("No Memcached kill for epehemral bucket")
            else:
                self.log.info("Step 10: Stopping and restarting memcached process")
                self.generate_docs()
                if not self.atomicity:
                    self.set_num_writer_and_reader_threads(num_writer_threads=self.new_num_writer_threads,
                                                           num_reader_threads=self.new_num_reader_threads)
                rebalance_task = self.task.async_rebalance(self.cluster.servers, [], [])
                tasks_info = self.data_load()
                if not self.atomicity:
                    self.set_num_writer_and_reader_threads(num_writer_threads="disk_io_optimized",
                                                           num_reader_threads="disk_io_optimized")
                # self.sleep(600, "Wait for Rebalance to start")
                self.task.jython_task_manager.get_task_result(rebalance_task)
                reached = RestHelper(self.rest).rebalance_reached(wait_step=120)
                self.assertTrue(reached, "rebalance failed, stuck or did not complete")
                self.stop_process()
                self.data_validation_mode(tasks_info)
                self.tasks = []
                self.bucket_util.print_bucket_stats()
                self.print_crud_stats()
                self.get_bucket_dgm(bucket)
            ########################################################################################################################
            self.log.info("Step 11: Failover a node and RebalanceOut that node with loading in parallel")
            self.std_vbucket_dist = self.input.param("std_vbucket_dist", None)
            std = self.std_vbucket_dist or 1.0

            prev_failover_stats = self.bucket_util.get_failovers_logs(self.cluster.nodes_in_cluster, self.bucket_util.buckets)
            prev_vbucket_stats = self.bucket_util.get_vbucket_seqnos(self.cluster.nodes_in_cluster, self.bucket_util.buckets)
            self.sleep(10)

            disk_replica_dataset, disk_active_dataset = self.bucket_util.get_and_compare_active_replica_data_set_all(
                self.cluster.nodes_in_cluster, self.bucket_util.buckets, path=None)

            self.rest = RestConnection(self.cluster.master)
            self.nodes = self.cluster_util.get_nodes(self.cluster.master)
            self.chosen = self.cluster_util.pick_nodes(self.cluster.master, howmany=1)

            # Mark Node for failover
            self.generate_docs()
            tasks_info = self.data_load()
            self.success_failed_over = self.rest.fail_over(self.chosen[0].id, graceful=False)

            self.sleep(300)
            self.nodes = self.rest.node_statuses()
            if not self.atomicity:
                self.set_num_writer_and_reader_threads(num_writer_threads=self.new_num_writer_threads,
                                                       num_reader_threads=self.new_num_reader_threads)
            self.rest.rebalance(otpNodes=[node.id for node in self.nodes], ejectedNodes=[self.chosen[0].id])
            # self.sleep(600)
            self.assertTrue(self.rest.monitorRebalance(stop_if_loop=True), msg="Rebalance failed")

            servs_out = [node for node in self.cluster.servers if node.ip == self.chosen[0].ip]
            self.cluster.nodes_in_cluster = list(set(self.cluster.nodes_in_cluster) - set(servs_out))
            self.available_servers += servs_out
            self.sleep(10)

            self.data_validation_mode(tasks_info)

            self.bucket_util.compare_failovers_logs(prev_failover_stats, self.cluster.nodes_in_cluster, self.bucket_util.buckets)
            self.sleep(10)

            self.bucket_util.data_analysis_active_replica_all(
                disk_active_dataset, disk_replica_dataset,
                self.cluster.servers[:self.nodes_in + self.nodes_init],
                self.bucket_util.buckets, path=None)
            nodes = self.cluster_util.get_nodes_in_cluster(self.cluster.master)
            self.bucket_util.vb_distribution_analysis(
                servers=nodes, buckets=self.bucket_util.buckets,
                num_replicas=2,
                std=std, total_vbuckets=self.cluster_util.vbuckets)
            self.sleep(10)
            self.tasks = []
            rebalance_task = self.rebalance(nodes_in=1, nodes_out=0)
            # self.sleep(600)
            self.task.jython_task_manager.get_task_result(rebalance_task)
            reached = RestHelper(self.rest).rebalance_reached(wait_step=120)
            self.assertTrue(reached, "rebalance failed, stuck or did not complete")
            self.bucket_util.print_bucket_stats()
            self.print_crud_stats()
            self.get_bucket_dgm(bucket)
            ########################################################################################################################
            self.log.info("Step 12: Failover a node and FullRecovery that node")

            self.std_vbucket_dist = self.input.param("std_vbucket_dist", None)
            std = self.std_vbucket_dist or 1.0

            prev_failover_stats = self.bucket_util.get_failovers_logs(self.cluster.nodes_in_cluster, self.bucket_util.buckets)
            prev_vbucket_stats = self.bucket_util.get_vbucket_seqnos(self.cluster.nodes_in_cluster, self.bucket_util.buckets)
            self.sleep(10)

            disk_replica_dataset, disk_active_dataset = self.bucket_util.get_and_compare_active_replica_data_set_all(
                self.cluster.nodes_in_cluster, self.bucket_util.buckets, path=None)

            self.rest = RestConnection(self.cluster.master)
            self.nodes = self.cluster_util.get_nodes(self.cluster.master)
            self.chosen = self.cluster_util.pick_nodes(self.cluster.master, howmany=1)

            self.generate_docs()
            tasks_info = self.data_load()
            # Mark Node for failover
            self.success_failed_over = self.rest.fail_over(self.chosen[0].id, graceful=False)

            self.sleep(300)

            # Mark Node for full recovery
            if self.success_failed_over:
                self.rest.set_recovery_type(otpNode=self.chosen[0].id, recoveryType="full")

            if not self.atomicity:
                self.set_num_writer_and_reader_threads(num_writer_threads=self.new_num_writer_threads,
                                                       num_reader_threads=self.new_num_reader_threads)

            rebalance_task = self.task.async_rebalance(
                self.cluster.servers[:self.nodes_init], [], [])
            if not self.atomicity:
                self.set_num_writer_and_reader_threads(num_writer_threads="disk_io_optimized",
                                                       num_reader_threads="disk_io_optimized")
            # self.sleep(600)
            self.task.jython_task_manager.get_task_result(rebalance_task)
            reached = RestHelper(self.rest).rebalance_reached(wait_step=120)
            self.assertTrue(reached, "rebalance failed, stuck or did not complete")
            self.sleep(10)

            self.data_validation_mode(tasks_info)

            self.bucket_util.compare_failovers_logs(prev_failover_stats, self.cluster.nodes_in_cluster, self.bucket_util.buckets)
            self.sleep(10)

            self.bucket_util.data_analysis_active_replica_all(
                disk_active_dataset, disk_replica_dataset,
                self.cluster.servers[:self.nodes_in + self.nodes_init],
                self.bucket_util.buckets, path=None)
            nodes = self.cluster_util.get_nodes_in_cluster(self.cluster.master)
            self.bucket_util.vb_distribution_analysis(
                servers=nodes, buckets=self.bucket_util.buckets,
                num_replicas=2,
                std=std, total_vbuckets=self.cluster_util.vbuckets)
            self.sleep(10)
            self.tasks = []
            self.bucket_util.print_bucket_stats()
            self.print_crud_stats()
            self.get_bucket_dgm(bucket)
            ########################################################################################################################
            self.log.info("Step 13: Failover a node and DeltaRecovery that node with loading in parallel")

            self.std_vbucket_dist = self.input.param("std_vbucket_dist", None)
            std = self.std_vbucket_dist or 1.0

            prev_failover_stats = self.bucket_util.get_failovers_logs(self.cluster.nodes_in_cluster, self.bucket_util.buckets)
            prev_vbucket_stats = self.bucket_util.get_vbucket_seqnos(self.cluster.nodes_in_cluster, self.bucket_util.buckets)
            self.sleep(10)

            disk_replica_dataset, disk_active_dataset = self.bucket_util.get_and_compare_active_replica_data_set_all(
                self.cluster.nodes_in_cluster, self.bucket_util.buckets, path=None)

            self.rest = RestConnection(self.cluster.master)
            self.nodes = self.cluster_util.get_nodes(self.cluster.master)
            self.chosen = self.cluster_util.pick_nodes(self.cluster.master, howmany=1)

            self.generate_docs()
            tasks_info = self.data_load()
            # Mark Node for failover
            self.success_failed_over = self.rest.fail_over(self.chosen[0].id, graceful=False)

            self.sleep(300)
            if self.success_failed_over:
                self.rest.set_recovery_type(otpNode=self.chosen[0].id, recoveryType="delta")
            if not self.atomicity:
                self.set_num_writer_and_reader_threads(num_writer_threads=self.new_num_writer_threads,
                                                       num_reader_threads=self.new_num_reader_threads)

            rebalance_task = self.task.async_rebalance(
                self.cluster.servers[:self.nodes_init], [], [])
            if not self.atomicity:
                self.set_num_writer_and_reader_threads(num_writer_threads="disk_io_optimized",
                                                       num_reader_threads="disk_io_optimized")
            # self.sleep(600)
            self.task.jython_task_manager.get_task_result(rebalance_task)
            reached = RestHelper(self.rest).rebalance_reached(wait_step=120)
            self.assertTrue(reached, "rebalance failed, stuck or did not complete")
            self.sleep(10)

            self.data_validation_mode(tasks_info)

            self.bucket_util.compare_failovers_logs(prev_failover_stats, self.cluster.nodes_in_cluster, self.bucket_util.buckets)
            self.sleep(10)

            self.bucket_util.data_analysis_active_replica_all(
                disk_active_dataset, disk_replica_dataset,
                self.cluster.servers[:self.nodes_in + self.nodes_init],
                self.bucket_util.buckets, path=None)
            nodes = self.cluster_util.get_nodes_in_cluster(self.cluster.master)
            self.bucket_util.vb_distribution_analysis(
                servers=nodes, buckets=self.bucket_util.buckets,
                num_replicas=2,
                std=std, total_vbuckets=self.cluster_util.vbuckets)
            self.bucket_util.print_bucket_stats()
            self.print_crud_stats()
            self.get_bucket_dgm(bucket)
        ########################################################################################################################
            self.log.info("Step 14: Updating the bucket replica to 1")
            bucket_helper = BucketHelper(self.cluster.master)
            for i in range(len(self.bucket_util.buckets)):
                bucket_helper.change_bucket_props(
                    self.bucket_util.buckets[i], replicaNumber=1)
            self.generate_docs()
            if not self.atomicity:
                self.set_num_writer_and_reader_threads(num_writer_threads=self.new_num_writer_threads,
                                                       num_reader_threads=self.new_num_reader_threads)
            rebalance_task = self.task.async_rebalance(self.cluster.servers, [], [])
            tasks_info = self.data_load()
            if not self.atomicity:
                self.set_num_writer_and_reader_threads(num_writer_threads="disk_io_optimized",
                                                       num_reader_threads="disk_io_optimized")
            # self.sleep(600, "Wait for Rebalance to start")
            self.task.jython_task_manager.get_task_result(rebalance_task)
            reached = RestHelper(self.rest).rebalance_reached(wait_step=120)
            self.assertTrue(reached, "rebalance failed, stuck or did not complete")
            self.data_validation_mode(tasks_info)
            self.tasks = []
            self.bucket_util.print_bucket_stats()
            self.print_crud_stats()
            self.get_bucket_dgm(bucket)
        ########################################################################################################################
            self.log.info("Step 15: Flush the bucket and start the entire process again")
            self.loop += 1
            if self.loop < self.iterations:
                # Flush the bucket
                self.bucket_util.flush_all_buckets(self.cluster.master)
                self.sleep(10)
                if len(self.cluster.nodes_in_cluster) > self.nodes_init:
                    self.nodes_cluster = self.cluster.nodes_in_cluster[:]
                    self.nodes_cluster.remove(self.cluster.master)
                    servs_out = random.sample(self.nodes_cluster, int(len(self.cluster.nodes_in_cluster) - self.nodes_init))
                    rebalance_task = self.task.async_rebalance(
                        self.cluster.servers[:self.nodes_init], [], servs_out)
                    # self.sleep(600)
                    self.task.jython_task_manager.get_task_result(rebalance_task)
                    self.available_servers += servs_out
                    self.cluster.nodes_in_cluster = list(set(self.cluster.nodes_in_cluster) - set(servs_out))
                    reached = RestHelper(self.rest).rebalance_reached(wait_step=120)
                    self.assertTrue(reached, "rebalance failed, stuck or did not complete")
                    self.get_bucket_dgm(bucket)
                self._iter_count = 0
            else:
                self.log.info("Volume Test Run Complete")
                self.get_bucket_dgm(bucket)
Esempio n. 11
0
    def test_volume_taf(self):
        self.loop = 0
        # self.cluster_utils.set_metadata_purge_interval()
        if self.number_of_indexes > 0:
            # start running select queries thread
            self.query_thread = threading.Thread(target=self.run_select_query)
            self.query_thread_flag = True
            self.query_thread.start()
            # Start running ui stats queries thread
            self.ui_stats_thread = threading.Thread(target=self.run_ui_stats_queries)
            self.ui_stats_thread_flag = True
            self.ui_stats_thread.start()
        self.log.info("Finished steps 1-4 successfully in setup")
        while self.loop < self.iterations:
            if self.loop > 0 or self.flush_buckets_before_indexes_creation:
                self.log.info("Reloading items to buckets")
                self.reload_data_into_buckets()
            #####################################################################################################
            self.log.info("Step 5: Rebalance in with Loading of docs")
            rebalance_task = self.rebalance(nodes_in=1, nodes_out=0)
            task = self.data_load_collection()
            self.wait_for_rebalance_to_complete(rebalance_task)
            self.wait_for_async_data_load_to_complete(task)
            self.data_validation_collection()
            if self.fts_indexes_to_recreate > 0:
                self.create_and_drop_fts_indexes(count=self.fts_indexes_to_recreate)
            self.bucket_util.print_bucket_stats(self.cluster)
            self.check_logs()
            ######################################################################################################
            self.log.info("Step 6: Rebalance Out with Loading of docs")
            rebalance_task = self.rebalance(nodes_in=0, nodes_out=1)
            task = self.data_load_collection()
            self.wait_for_rebalance_to_complete(rebalance_task)
            self.wait_for_async_data_load_to_complete(task)
            self.data_validation_collection()
            if self.fts_indexes_to_recreate > 0:
                self.create_and_drop_fts_indexes(count=self.fts_indexes_to_recreate)
            self.bucket_util.print_bucket_stats(self.cluster)
            self.check_logs()
            ######################################################################################################
            self.log.info("Step 7: Rebalance In_Out with Loading of docs")
            rebalance_task = self.rebalance(nodes_in=2, nodes_out=1)
            task = self.data_load_collection()
            self.wait_for_rebalance_to_complete(rebalance_task)
            self.wait_for_async_data_load_to_complete(task)
            self.data_validation_collection()
            if self.fts_indexes_to_recreate > 0:
                self.create_and_drop_fts_indexes(count=self.fts_indexes_to_recreate)
            self.bucket_util.print_bucket_stats(self.cluster)
            self.check_logs()
            #####################################################################################################
            self.log.info("Step 8: Swap with Loading of docs")
            rebalance_task = self.rebalance(nodes_in=1, nodes_out=1)
            task = self.data_load_collection()
            self.wait_for_rebalance_to_complete(rebalance_task)
            self.wait_for_async_data_load_to_complete(task)
            self.data_validation_collection()
            if self.fts_indexes_to_recreate > 0:
                self.create_and_drop_fts_indexes(count=self.fts_indexes_to_recreate)
            self.bucket_util.print_bucket_stats(self.cluster)
            self.check_logs()
            #######################################################################################################
            self.log.info("Step 9: Updating the bucket replica to 2 and rebalance-in")
            bucket_helper = BucketHelper(self.cluster.master)
            for i in range(len(self.cluster.buckets)):
                bucket_helper.change_bucket_props(
                    self.cluster.buckets[i], replicaNumber=2)
            rebalance_task = self.rebalance(nodes_in=1, nodes_out=0)
            task = self.data_load_collection()
            self.wait_for_rebalance_to_complete(rebalance_task)
            self.wait_for_async_data_load_to_complete(task)
            self.data_validation_collection()
            if self.fts_indexes_to_recreate > 0:
                self.create_and_drop_fts_indexes(count=self.fts_indexes_to_recreate)
            self.bucket_util.print_bucket_stats(self.cluster)
            self.check_logs()
            ######################################################################################################
            self.log.info("Enabling autoreprovison before inducing failure to prevent data loss "
                          "for if there are ephemeral buckets")
            status = self.rest.update_autoreprovision_settings(True, maxNodes=1)
            if not status:
                self.fail("Failed to enable autoreprovison")
            step_count = 9
            for action in [CouchbaseError.STOP_MEMCACHED, CouchbaseError.STOP_PROMETHEUS]:
                step_count = step_count + 1
                self.log.info("Step {0}: {1}".format(step_count, action))
                # TODO Uncomment this after debugging CBQE-6721
                # self.log.info("Forcing durability level: MAJORITY")
                # self.durability_level = "MAJORITY"
                task = self.data_load_collection()
                self.induce_and_revert_failure(action)
                # Rebalance is required after error is reverted
                rebalance_task = self.task.async_rebalance(self.cluster.servers, [], [],
                                                           retry_get_process_num=self.retry_get_process_num)
                self.wait_for_rebalance_to_complete(rebalance_task)
                self.wait_for_async_data_load_to_complete(task)
                self.data_validation_collection()
                if self.fts_indexes_to_recreate > 0:
                    self.create_and_drop_fts_indexes(count=self.fts_indexes_to_recreate)
                self.bucket_util.print_bucket_stats(self.cluster)
                self.check_logs()
            self.durability_level = ""
            #######################################################################################################
            step_count = 11
            for failover in ["Graceful", "Hard"]:
                for action in ["RebalanceOut", "FullRecovery", "DeltaRecovery"]:
                    step_count = step_count + 1
                    self.log.info(
                        "Step {0}: {1} Failover a node and {2} that node with data load in parallel".
                            format(step_count, failover, action))

                    self.std_vbucket_dist = self.input.param("std_vbucket_dist", None)
                    std = self.std_vbucket_dist or 1.0

                    kv_nodes = self.cluster_util.get_kv_nodes(self.cluster)
                    self.log.info("Collecting pre_failover_stats. KV nodes are {0}".format(kv_nodes))
                    prev_failover_stats = self.bucket_util.get_failovers_logs(kv_nodes,
                                                                              self.cluster.buckets)
                    prev_vbucket_stats = self.bucket_util.get_vbucket_seqnos(kv_nodes,
                                                                             self.cluster.buckets)
                    self.sleep(10)

                    disk_replica_dataset, disk_active_dataset = self.bucket_util. \
                        get_and_compare_active_replica_data_set_all(kv_nodes, self.cluster.buckets,
                                                                    path=None)

                    # Pick node(s) for failover
                    failover_nodes = list()
                    kv_nodes = self.cluster_util.get_kv_nodes(self.cluster)
                    for node in kv_nodes:
                        if node.ip != self.cluster.master.ip:
                            failover_nodes.append(node)
                            break

                    reset_flag = False
                    if (not self.durability_level) and failover == "Hard":
                        # Force a durability level to prevent data loss during hard failover
                        # TODO Uncomment this after debugging CBQE-6721
                        # self.log.info("Forcing durability level: MAJORITY")
                        # self.durability_level = "MAJORITY"
                        reset_flag = True
                    task = self.data_load_collection()
                    if reset_flag:
                        self.durability_level = ""

                    # Failover the node(s)
                    if failover == "Graceful":
                        failover_result = self.task.failover(self.cluster.nodes_in_cluster,
                                                             failover_nodes=failover_nodes,
                                                             graceful=True, wait_for_pending=120,
                                                             all_at_once=True)
                    else:
                        failover_result = self.task.failover(self.cluster.nodes_in_cluster,
                                                             failover_nodes=failover_nodes,
                                                             graceful=False, wait_for_pending=120,
                                                             all_at_once=True)

                    self.assertTrue(failover_result, "Failover Failed")

                    # Perform the action
                    if action == "RebalanceOut":
                        rebalance_task = self.task.async_rebalance(self.cluster.nodes_in_cluster, [],
                                                                   failover_nodes,
                                                                   retry_get_process_num=
                                                                   self.retry_get_process_num)
                        self.wait_for_rebalance_to_complete(rebalance_task)
                        self.cluster.nodes_in_cluster = list(set(self.cluster.nodes_in_cluster) -
                                                             set(failover_nodes))
                        for node in failover_nodes:
                            self.available_servers.append(node)
                        self.sleep(10)
                    else:
                        if action == "FullRecovery":
                            for failover_node in failover_nodes:
                                self.rest.set_recovery_type(otpNode='ns_1@' + failover_node.ip,
                                                            recoveryType="full")
                        elif action == "DeltaRecovery":
                            for failover_node in failover_nodes:
                                self.rest.set_recovery_type(otpNode='ns_1@' + failover_node.ip,
                                                            recoveryType="delta")

                        rebalance_task = self.task.async_rebalance(self.cluster.nodes_in_cluster, [],
                                                                   [], retry_get_process_num=
                                                                   self.retry_get_process_num)
                        self.wait_for_rebalance_to_complete(rebalance_task)
                        self.sleep(10)

                    self.wait_for_async_data_load_to_complete(task)
                    self.data_validation_collection()

                    kv_nodes = self.cluster_util.get_kv_nodes(self.cluster)
                    self.log.info("Collecting post_failover_stats. KV nodes are {0}".format(kv_nodes))
                    self.bucket_util.compare_failovers_logs(prev_failover_stats, kv_nodes,
                                                            self.cluster.buckets)
                    self.sleep(10)

                    self.bucket_util.data_analysis_active_replica_all(
                        disk_active_dataset, disk_replica_dataset,
                        kv_nodes,
                        self.cluster.buckets, path=None)
                    self.bucket_util.vb_distribution_analysis(
                        self.cluster,
                        servers=kv_nodes, buckets=self.cluster.buckets,
                        num_replicas=2,
                        std=std, total_vbuckets=self.cluster.vbuckets)
                    self.sleep(10)
                    # Bring back the rebalance out node back to cluster for further steps
                    if action == "RebalanceOut":
                        self.sleep(120)
                        self.log.info("Rebalancing-in a node")
                        rebalance_task = self.rebalance(nodes_in=1,
                                                        nodes_out=0)
                        # self.sleep(600)
                        self.wait_for_rebalance_to_complete(rebalance_task)
                    if self.fts_indexes_to_recreate > 0:
                        self.create_and_drop_fts_indexes(count=self.fts_indexes_to_recreate)
                    self.bucket_util.print_bucket_stats(self.cluster)
                    self.check_logs()
            #####################################################################################################
            self.log.info("Step 18: Updating the bucket replica to 1")
            bucket_helper = BucketHelper(self.cluster.master)
            for i in range(len(self.cluster.buckets)):
                bucket_helper.change_bucket_props(
                    self.cluster.buckets[i], replicaNumber=1)
            rebalance_task = self.task.async_rebalance(self.cluster.servers, [], [],
                                                       retry_get_process_num=self.retry_get_process_num)
            task = self.data_load_collection()
            self.wait_for_rebalance_to_complete(rebalance_task)
            self.wait_for_async_data_load_to_complete(task)
            self.data_validation_collection()
            if self.fts_indexes_to_recreate > 0:
                self.create_and_drop_fts_indexes(count=self.fts_indexes_to_recreate)
            self.bucket_util.print_bucket_stats(self.cluster)
            self.check_logs()
            #######################################################################################################
            self.cluster.nodes_in_cluster = self.cluster.servers
            step_count = 19
            removed_nodes = list()  # total list of all nodes that will be removed
            if self.perform_quorum_failover:
                self.log.info("Step {0}: Quorum failover nodes".format(step_count))
                # keep performing QF until one node is left
                while len(self.cluster.nodes_in_cluster) != 1:
                    majority_number = int(math.ceil(len(self.cluster.nodes_in_cluster) / 2.0))
                    self.nodes_cluster = self.cluster.nodes_in_cluster[:]
                    self.nodes_cluster.remove(self.cluster.master)
                    remove_nodes = random.sample(self.nodes_cluster, majority_number)
                    self.custom_induce_failure(remove_nodes, failover_action="stop_server")
                    self.sleep(10, "Wait after inducing failure")
                    self.log.info("Failing over nodes explicitly {0}".format(remove_nodes))
                    result = self.task.failover(self.cluster.nodes_in_cluster, failover_nodes=remove_nodes,
                                                graceful=False, wait_for_pending=120,
                                                allow_unsafe=True,
                                                all_at_once=True)
                    self.assertTrue(result, "Failover Failed")
                    self.custom_remove_failure(nodes=remove_nodes, revert_failure="stop_server")
                    self.sleep(15)
                    self.wipe_config_on_removed_nodes(remove_nodes)
                    for node in remove_nodes:
                        removed_nodes.append(node)
                    self.cluster.nodes_in_cluster = [node for node in self.cluster.nodes_in_cluster
                                                     if node not in remove_nodes]
                # add back all the nodes with kv service
                rebalance_task = self.task.async_rebalance(self.cluster.nodes_in_cluster, removed_nodes, [],
                                                           retry_get_process_num=self.retry_get_process_num)
                self.wait_for_rebalance_to_complete(rebalance_task)
                self.cluster.nodes_in_cluster = self.cluster.servers[:]
                step_count = step_count + 1
                self.bucket_util.print_bucket_stats(self.cluster)
                self.check_logs()
            #####################################################################################################
            self.log.info("Step {0}: Flush bucket(s) and start the entire process again".format(step_count))
            self.loop += 1
            if self.loop < self.iterations:
                # Flush bucket(s)
                self.bucket_util.flush_all_buckets(
                    self.cluster, skip_resetting_num_items=True)
                self.sleep(10)
                if len(self.cluster.nodes_in_cluster) > self.nodes_init:
                    self.nodes_cluster = self.cluster.nodes_in_cluster[:]
                    self.nodes_cluster.remove(self.cluster.master)
                    servs_out = random.sample(self.nodes_cluster,
                                              int(len(self.cluster.nodes_in_cluster) - self.nodes_init))
                    rebalance_task = self.task.async_rebalance(
                        self.cluster.servers[:self.nodes_init], [], servs_out,
                        retry_get_process_num=self.retry_get_process_num)
                    self.wait_for_rebalance_to_complete(rebalance_task)
                    self.available_servers += servs_out
                    self.cluster.nodes_in_cluster = list(set(self.cluster.nodes_in_cluster) - set(servs_out))
            else:
                if self.number_of_indexes > 0:
                    self.close_all_threads()
                self.log.info("Volume Test Run Complete")