Example #1
0
 def stop_process(self):
     target_node = self.servers[2]
     remote = RemoteMachineShellConnection(target_node)
     error_sim = CouchbaseError(self.log, remote)
     error_to_simulate = "stop_memcached"
     # Induce the error condition
     error_sim.create(error_to_simulate)
     self.sleep(20, "Wait before reverting the error condition")
     # Revert the simulated error condition and close the ssh session
     error_sim.revert(error_to_simulate)
     remote.disconnect()
Example #2
0
 def induce_and_revert_failure(self, action):
     target_node = self.servers[-1]  # select last node
     remote = RemoteMachineShellConnection(target_node)
     error_sim = CouchbaseError(self.log, remote)
     error_sim.create(action)
     self.sleep(20, "Wait before reverting the error condition")
     if action in [CouchbaseError.STOP_MEMCACHED, CouchbaseError.STOP_PROMETHEUS]:
         # Revert the simulated error condition explicitly. In kill memcached, prometheus
         # babysitter will bring back the process automatically
         error_sim.revert(action)
     remote.disconnect()
Example #3
0
    def test_with_sync_write(self):
        cluster_node = choice(self.kv_nodes)
        target_vb_type, simulate_error = \
            DurabilityHelper.get_vb_and_error_type(self.durability_level)
        doc_gen = doc_generator(
            self.key,
            0,
            2,
            target_vbucket=self.node_data[cluster_node]["%s_vbs" %
                                                        target_vb_type])
        client = self.sdk_client_pool.get_client_for_bucket(
            self.bucket, self.scope_name, self.collection_name)

        key_1, value_1 = doc_gen.next()
        key_2, value_2 = doc_gen.next()

        if self.doc_ops[0] != DocLoading.Bucket.DocOps.CREATE:
            client.crud(DocLoading.Bucket.DocOps.CREATE, key_1, value_1)
        if self.doc_ops[1] != DocLoading.Bucket.DocOps.CREATE:
            client.crud(DocLoading.Bucket.DocOps.CREATE, key_2, value_2)

        sync_op = Thread(target=self.crud,
                         args=[client, self.doc_ops[0], key_1],
                         kwargs={
                             "value": value_1,
                             "durability": self.durability_level,
                             "expected_thread_val": 1
                         })
        async_op = Thread(target=self.crud,
                          args=[client, self.doc_ops[1], key_2],
                          kwargs={
                              "value": value_2,
                              "expected_thread_val": 0
                          })

        cb_err = CouchbaseError(self.log,
                                self.node_data[cluster_node]["shell"])
        cb_err.create(simulate_error, self.bucket.name)

        # Start doc_ops
        sync_op.start()
        self.sleep(1, "Wait before async operation")
        async_op.start()

        # Wait for ops to complete
        async_op.join()
        cb_err.revert(simulate_error, self.bucket.name)
        sync_op.join()

        self.validate_test_failure()
Example #4
0
    def MB36948(self):
        node_to_stop = self.servers[0]
        self.log.info("Adding index/query node")
        self.task.rebalance([self.cluster.master], [self.servers[2]], [],
                            services=["n1ql,index"])
        self.log.info("Creating SDK client connection")
        client = SDKClient([self.cluster.master],
                           self.bucket_util.buckets[0],
                           compression_settings=self.sdk_compression)

        self.log.info("Stopping memcached on: %s" % node_to_stop)
        ssh_conn = RemoteMachineShellConnection(node_to_stop)
        err_sim = CouchbaseError(self.log, ssh_conn)
        err_sim.create(CouchbaseError.STOP_MEMCACHED)

        result = client.crud("create", "abort1", "abort1_val")
        if not result["status"]:
            self.log_failure("Async SET failed")

        result = client.crud("update",
                             "abort1",
                             "abort1_val",
                             durability=self.durability_level,
                             timeout=3,
                             time_unit="seconds")
        if result["status"]:
            self.log_failure("Sync write succeeded")
        if SDKException.DurabilityAmbiguousException not in result["error"]:
            self.log_failure("Invalid exception for sync_write: %s" % result)

        self.log.info("Resuming memcached on: %s" % node_to_stop)
        err_sim.revert(CouchbaseError.STOP_MEMCACHED)

        self.bucket_util._wait_for_stats_all_buckets()
        self.bucket_util.verify_stats_all_buckets(1)

        self.log.info("Closing ssh & SDK connections")
        ssh_conn.disconnect()
        client.close()

        self.validate_test_failure()
Example #5
0
 def test_prometheus_and_ns_server_stats_after_failure_scenarios(self):
     """
     Run all metrics before and after failure scenarios and validate
     both ns_server and prometheus stats
     """
     self.bucket_util.load_sample_bucket(self.cluster, TravelSample())
     target_node = self.servers[0]
     remote = RemoteMachineShellConnection(target_node)
     error_sim = CouchbaseError(self.log, remote)
     self.log.info("Before failure")
     self.get_all_metrics(self.components, self.parse, self.metric_name)
     try:
         # Induce the error condition
         error_sim.create(self.simulate_error)
         self.sleep(20, "Wait before reverting the error condition")
     finally:
         # Revert the simulated error condition and close the ssh session
         error_sim.revert(self.simulate_error)
         remote.disconnect()
     self.log.info("After failure")
     self.get_all_metrics(self.components, self.parse, self.metric_name)
Example #6
0
    def test_stop_process(self):
        """
        1. Starting loading docs into the default bucket
        2. Stop the requested process, which will impact the
           memcached operations
        3. Wait for load bucket task to complete
        4. Validate the docs for durability
        """
        error_to_simulate = self.input.param("simulate_error", None)
        def_bucket = self.bucket_util.buckets[0]
        target_node = self.getTargetNode()
        remote = RemoteMachineShellConnection(target_node)
        error_sim = CouchbaseError(self.log, remote)
        target_vbuckets = self.getVbucketNumbers(remote, def_bucket.name,
                                                 self.target_node)
        if len(target_vbuckets) == 0:
            self.log.error("No target vbucket list generated to load data")
            remote.disconnect()
            return

        # Create doc_generator targeting only the active/replica vbuckets
        # present in the target_node
        gen_load = doc_generator(self.key,
                                 self.num_items,
                                 self.new_docs_to_add,
                                 key_size=self.key_size,
                                 doc_size=self.doc_size,
                                 doc_type=self.doc_type,
                                 target_vbucket=target_vbuckets,
                                 vbuckets=self.cluster_util.vbuckets)

        if self.atomicity:
            task = self.task.async_load_gen_docs_atomicity(
                self.cluster,
                self.bucket_util.buckets,
                gen_load,
                "create",
                exp=0,
                batch_size=10,
                process_concurrency=self.process_concurrency,
                replicate_to=self.replicate_to,
                persist_to=self.persist_to,
                durability=self.durability_level,
                timeout_secs=self.sdk_timeout,
                update_count=self.update_count,
                transaction_timeout=self.transaction_timeout,
                commit=True,
                sync=self.sync)
        else:
            task = self.task.async_load_gen_docs(
                self.cluster,
                def_bucket,
                gen_load,
                "create",
                exp=0,
                batch_size=1,
                process_concurrency=8,
                replicate_to=self.replicate_to,
                persist_to=self.persist_to,
                durability=self.durability_level,
                timeout_secs=self.sdk_timeout,
                skip_read_on_error=True)

        # Induce the error condition
        error_sim.create(error_to_simulate)

        self.sleep(20, "Wait before reverting the error condition")
        # Revert the simulated error condition and close the ssh session
        error_sim.revert(error_to_simulate)
        remote.disconnect()

        # Wait for doc loading task to complete
        self.task.jython_task_manager.get_task_result(task)
        if not self.atomicity:
            if len(task.fail.keys()) != 0:
                if self.target_node == "active" or self.num_replicas in [2, 3]:
                    self.log_failure("Unwanted failures for keys: %s" %
                                     task.fail.keys())

            validate_passed = \
                self.durability_helper.validate_durability_exception(
                    task.fail,
                    SDKException.DurabilityAmbiguousException)
            if not validate_passed:
                self.log_failure("Unwanted exception seen during validation")

            # Create SDK connection for CRUD retries
            sdk_client = SDKClient([self.cluster.master], def_bucket)
            for doc_key, crud_result in task.fail.items():
                result = sdk_client.crud("create",
                                         doc_key,
                                         crud_result["value"],
                                         replicate_to=self.replicate_to,
                                         persist_to=self.persist_to,
                                         durability=self.durability_level,
                                         timeout=self.sdk_timeout)
                if result["status"] is False:
                    self.log_failure("Retry of doc_key %s failed: %s" %
                                     (doc_key, result["error"]))
            # Close the SDK connection
            sdk_client.close()

        # Update self.num_items
        self.num_items += self.new_docs_to_add

        if not self.atomicity:
            # Validate doc count
            self.bucket_util._wait_for_stats_all_buckets()
            self.bucket_util.verify_stats_all_buckets(self.num_items)

        self.validate_test_failure()
    def test_durability_abort(self):
        """
        Test to validate durability abort is triggered properly with proper
        rollback on active vbucket
        :return:
        """
        load_task = dict()

        # Override d_level, error_simulation type based on d_level
        self.__get_d_level_and_error_to_simulate()

        kv_nodes = self.cluster_util.get_kv_nodes(self.cluster)
        for server in kv_nodes:
            ssh_shell = RemoteMachineShellConnection(server)
            cbstats = Cbstats(server)
            cb_err = CouchbaseError(self.log, ssh_shell)
            target_vb_type = "replica"
            if self.durability_level \
                    == Bucket.DurabilityLevel.MAJORITY_AND_PERSIST_TO_ACTIVE:
                target_vb_type = "active"
            target_vbs = cbstats.vbucket_list(self.bucket.name, target_vb_type)
            doc_load_spec = dict()
            doc_load_spec["doc_crud"] = dict()
            doc_load_spec["doc_crud"][
                MetaCrudParams.DocCrud.CREATE_PERCENTAGE_PER_COLLECTION] = 2
            doc_load_spec["doc_crud"][
                MetaCrudParams.DocCrud.UPDATE_PERCENTAGE_PER_COLLECTION] = 2
            doc_load_spec["doc_crud"][
                MetaCrudParams.DocCrud.DELETE_PERCENTAGE_PER_COLLECTION] = 2

            doc_load_spec["doc_crud"][MetaCrudParams.DocCrud.COMMON_DOC_KEY] \
                = "test_collections"
            doc_load_spec[MetaCrudParams.TARGET_VBUCKETS] = target_vbs

            doc_load_spec[MetaCrudParams.DURABILITY_LEVEL] \
                = self.durability_level
            doc_load_spec[MetaCrudParams.RETRY_EXCEPTIONS] = [
                SDKException.DurabilityAmbiguousException
            ]
            doc_load_spec[MetaCrudParams.SDK_TIMEOUT] = 2
            doc_load_spec[MetaCrudParams.SKIP_READ_ON_ERROR] = True
            doc_load_spec[MetaCrudParams.SUPPRESS_ERROR_TABLE] = True

            cb_err.create(self.simulate_error, self.cluster.buckets[0].name)
            load_task[server] = \
                self.bucket_util.run_scenario_from_spec(
                    self.task,
                    self.cluster,
                    self.cluster.buckets,
                    doc_load_spec,
                    batch_size=1,
                    validate_task=False)
            cb_err.revert(self.simulate_error, self.cluster.buckets[0].name)
            ssh_shell.disconnect()
        self.validate_test_failure()

        failed = self.durability_helper.verify_vbucket_details_stats(
            self.bucket,
            kv_nodes,
            vbuckets=self.cluster.vbuckets,
            expected_val=self.verification_dict)
        if failed:
            self.log_failure("Cbstat vbucket-details verification failed "
                             "after aborts")
        self.validate_test_failure()

        # Retry aborted keys with healthy cluster
        self.log.info("Performing CRUDs on healthy cluster")
        for server in kv_nodes:
            self.bucket_util.validate_doc_loading_results(load_task[server])
            if load_task[server].result is False:
                self.log_failure("Doc retry task failed on %s" % server.ip)

            # Update cbstat vb-details verification counters
            for bucket, s_dict in load_task[server].loader_spec.items():
                for s_name, c_dict in s_dict["scopes"].items():
                    for c_name, _ in c_dict["collections"].items():
                        c_crud_data = load_task[server].loader_spec[bucket][
                            "scopes"][s_name]["collections"][c_name]
                        for op_type in c_crud_data.keys():
                            total_mutation = \
                                c_crud_data[op_type]["doc_gen"].end \
                                - c_crud_data[op_type]["doc_gen"].start
                            if op_type in DocLoading.Bucket.DOC_OPS:
                                self.verification_dict["ops_%s" % op_type] \
                                    += total_mutation
                                self.verification_dict[
                                    "sync_write_committed_count"] \
                                    += total_mutation
            failed = self.durability_helper.verify_vbucket_details_stats(
                self.bucket,
                self.cluster_util.get_kv_nodes(self.cluster),
                vbuckets=self.cluster.vbuckets,
                expected_val=self.verification_dict)
            if failed:
                self.log_failure("Cbstat vbucket-details verification "
                                 "failed after ops on server: %s" % server.ip)
        self.validate_test_failure()
Example #8
0
    def validate_durability_with_crud(
            self, bucket, bucket_durability,
            verification_dict,
            doc_start_index=0,
            num_items_to_load=1, op_type="create",
            doc_durability=Bucket.DurabilityLevel.NONE):
        """
        Common API to validate durability settings of the bucket is set
        correctly or not.

        :param bucket: Bucket object to validate
        :param bucket_durability: Durability set for the bucket
                                  Note: Need this because the string within the
                                        bucket object is different than this.
        :param verification_dict: To hold the values for req cbstats to verify
        :param doc_start_index: Starting index to be considered for doc_load
        :param num_items_to_load: Number of items to be loaded to test.
                                  Default is '1'
        :param op_type: Type of CRUD to perform. Default is 'create'
        :param doc_durability: Document durability level to use during CRUD.
                               Default level is 'None'
        :return:
        """
        def get_d_level_used():
            if self.d_level_order.index(bucket_durability) \
                    < self.d_level_order.index(doc_durability):
                return doc_durability
            return bucket_durability

        d_level_to_test = get_d_level_used()
        # Nothing to test for durability_level=None (async_write case)
        if d_level_to_test == Bucket.DurabilityLevel.NONE:
            return

        self.log.info("Performing %s operation to validate d_level %s"
                      % (op_type, d_level_to_test))

        # Can't simulate error conditions for all durability_levels.
        # So only perform CRUD without error_sim
        if len(self.vbs_in_node.keys()) > 1:
            # Pick a random node to perform error sim and load
            random_node = choice(self.vbs_in_node.keys())

            target_vb_type, simulate_error = \
                self.durability_helper.get_vb_and_error_type(d_level_to_test)

            doc_gen = doc_generator(
                self.key, doc_start_index, num_items_to_load,
                target_vbucket=self.vbs_in_node[random_node][target_vb_type])
            error_sim = CouchbaseError(self.log,
                                       self.vbs_in_node[random_node]["shell"])

            doc_load_task = self.task.async_load_gen_docs(
                self.cluster, bucket, doc_gen, op_type,
                exp=self.maxttl,
                replicate_to=self.replicate_to,
                persist_to=self.persist_to,
                durability=doc_durability,
                timeout_secs=32,
                batch_size=1,
                skip_read_on_error=True,
                suppress_error_table=True,
                start_task=False,
                sdk_client_pool=self.sdk_client_pool)

            self.sleep(5, "Wait for sdk_client to get warmed_up")
            # Simulate target error condition
            error_sim.create(simulate_error)
            self.sleep(5, "Wait for error_sim to take effect")

            # Start doc_loading task and wait for it to complete
            self.task_manager.add_new_task(doc_load_task)
            self.task_manager.get_task_result(doc_load_task)

            # Revert the induced error condition
            self.sleep(5, "Wait before reverting error_simulation")
            error_sim.revert(simulate_error)

            # Validate failed doc count and exception type from SDK
            if not doc_load_task.fail.keys():
                self.log_failure("Docs inserted without honoring the "
                                 "bucket durability level")
            for key, result in doc_load_task.fail.items():
                if SDKException.DurabilityAmbiguousException \
                        not in str(result["error"]):
                    self.log_failure("Invalid exception for key %s "
                                     "during %s operation: %s"
                                     % (key, op_type, result["error"]))

            verification_dict["sync_write_aborted_count"] += num_items_to_load
        else:
            doc_gen = doc_generator(self.key, doc_start_index,
                                    doc_start_index+num_items_to_load)

        # Retry the same CRUDs without any error simulation in place
        doc_load_task = self.task.async_load_gen_docs(
            self.cluster, bucket, doc_gen, op_type,
            exp=self.maxttl,
            durability=doc_durability,
            timeout_secs=2,
            batch_size=1,
            sdk_client_pool=self.sdk_client_pool)
        self.task_manager.get_task_result(doc_load_task)
        if doc_load_task.fail:
            self.log_failure("Failures seen during CRUD without "
                             "error simulation. Keys failed: %s"
                             % doc_load_task.fail.keys())
        else:
            verification_dict["ops_%s" % op_type] += \
                num_items_to_load
            verification_dict["sync_write_committed_count"] += \
                num_items_to_load
        def test_scenario(bucket, doc_ops,
                          with_sync_write_val=None):
            # Set crud_batch_size
            crud_batch_size = 4
            simulate_error = CouchbaseError.STOP_MEMCACHED

            # Fetch target_vbs for CRUDs
            node_vb_info = self.vbs_in_node
            target_vbuckets = node_vb_info[target_nodes[0]]["replica"]
            if len(target_nodes) > 1:
                index = 1
                while index < len(target_nodes):
                    target_vbuckets = list(
                        set(target_vbuckets).intersection(
                            set(node_vb_info[target_nodes[index]]["replica"]))
                    )
                    index += 1

            # Variable to hold one of the doc_generator objects
            gen_loader_1 = None
            gen_loader_2 = None

            # Initialize doc_generators to use for testing
            self.log.info("Creating doc_generators")
            gen_create = doc_generator(
                self.key, self.num_items, crud_batch_size,
                vbuckets=self.cluster.vbuckets,
                target_vbucket=target_vbuckets)
            gen_update = doc_generator(
                self.key, 0, crud_batch_size,
                vbuckets=self.cluster.vbuckets,
                target_vbucket=target_vbuckets, mutate=1)
            gen_delete = doc_generator(
                self.key, 0, crud_batch_size,
                vbuckets=self.cluster.vbuckets,
                target_vbucket=target_vbuckets)
            self.log.info("Done creating doc_generators")

            # Start CRUD operation based on the given 'doc_op' type
            if doc_ops[0] == "create":
                self.num_items += crud_batch_size
                gen_loader_1 = gen_create
            elif doc_ops[0] in ["update", "replace", "touch"]:
                gen_loader_1 = gen_update
            elif doc_ops[0] == "delete":
                gen_loader_1 = gen_delete
                self.num_items -= crud_batch_size

            if doc_ops[1] == "create":
                gen_loader_2 = gen_create
            elif doc_ops[1] in ["update", "replace", "touch"]:
                gen_loader_2 = gen_update
            elif doc_ops[1] == "delete":
                gen_loader_2 = gen_delete

            # Load required docs for doc_op_1 in case of type != create
            if doc_op[2] == "load_initial_docs":
                doc_loading_task = self.task.async_load_gen_docs(
                    self.cluster, bucket, gen_loader_1, "create", 0,
                    batch_size=crud_batch_size, process_concurrency=1,
                    timeout_secs=10,
                    print_ops_rate=False,
                    sdk_client_pool=self.sdk_client_pool)
                self.task_manager.get_task_result(doc_loading_task)
                if doc_loading_task.fail:
                    self.log_failure("Failure while loading initial docs")
                self.summary.add_step("Create docs for %s" % doc_op[0])
                verification_dict["ops_create"] += crud_batch_size
                verification_dict["sync_write_committed_count"] \
                    += crud_batch_size

            # Initialize tasks and store the task objects
            doc_loader_task = self.task.async_load_gen_docs(
                self.cluster, bucket, gen_loader_1, doc_ops[0], 0,
                batch_size=crud_batch_size, process_concurrency=8,
                timeout_secs=60,
                print_ops_rate=False,
                start_task=False,
                sdk_client_pool=self.sdk_client_pool)

            # SDK client for performing individual ops
            client = SDKClient([self.cluster.master], bucket)

            # Perform specified action
            for node in target_nodes:
                error_sim = CouchbaseError(self.log,
                                           self.vbs_in_node[node]["shell"])
                error_sim.create(simulate_error,
                                 bucket_name=bucket.name)
            self.sleep(5, "Wait for error simulation to take effect")

            self.task_manager.add_new_task(doc_loader_task)
            self.sleep(5, "Wait for task_1 CRUDs to reach server")

            # Perform specified CRUD operation on sync_write docs
            tem_gen = deepcopy(gen_loader_2)
            while tem_gen.has_next():
                key, value = tem_gen.next()
                for retry_strategy in [
                        SDKConstants.RetryStrategy.FAIL_FAST,
                        SDKConstants.RetryStrategy.BEST_EFFORT]:
                    if with_sync_write_val:
                        fail = client.crud(doc_ops[1], key, value=value,
                                           exp=0,
                                           durability=with_sync_write_val,
                                           timeout=3, time_unit="seconds",
                                           sdk_retry_strategy=retry_strategy)
                    else:
                        fail = client.crud(doc_ops[1], key, value=value,
                                           exp=0,
                                           timeout=3, time_unit="seconds",
                                           sdk_retry_strategy=retry_strategy)

                    expected_exception = SDKException.AmbiguousTimeoutException
                    retry_reason = \
                        SDKException.RetryReason.KV_SYNC_WRITE_IN_PROGRESS
                    if retry_strategy == SDKConstants.RetryStrategy.FAIL_FAST:
                        expected_exception = \
                            SDKException.RequestCanceledException
                        retry_reason = \
                            SDKException.RetryReason \
                            .KV_SYNC_WRITE_IN_PROGRESS_NO_MORE_RETRIES

                    # Validate the returned error from the SDK
                    if expected_exception not in str(fail["error"]):
                        self.log_failure("Invalid exception for {0}: {1}"
                                         .format(key, fail["error"]))
                    if retry_reason not in str(fail["error"]):
                        self.log_failure("Invalid retry reason for {0}: {1}"
                                         .format(key, fail["error"]))

                    # Try reading the value in SyncWrite in-progress state
                    fail = client.crud("read", key)
                    if doc_ops[0] == "create":
                        # Expected KeyNotFound in case of CREATE operation
                        if fail["status"] is True:
                            self.log_failure(
                                "%s returned value during SyncWrite state: %s"
                                % (key, fail))
                    else:
                        # Expects prev value in case of other operations
                        if fail["status"] is False:
                            self.log_failure(
                                "Key %s read failed for previous value: %s"
                                % (key, fail))

            # Revert the introduced error condition
            for node in target_nodes:
                error_sim = CouchbaseError(self.log,
                                           self.vbs_in_node[node]["shell"])
                error_sim.revert(simulate_error,
                                 bucket_name=bucket.name)

            # Wait for doc_loader_task to complete
            self.task.jython_task_manager.get_task_result(doc_loader_task)

            verification_dict["ops_%s" % doc_op[0]] += crud_batch_size
            verification_dict["sync_write_committed_count"] \
                += crud_batch_size

            # Disconnect the client
            client.close()
    def test_update_durability_between_doc_op(self):
        """
        1. Create Bucket with durability level set.
        2. Bring down a node such that durability CRUD will wait
        3. Perform doc_op and update bucket_level_durability
        4. Revert scenario induced in step#2, such that doc_op will complete
        5. Make sure doc_ops in step#3 went through using prev. d-level
        """
        # Starting from max_durability levels because to iterate
        # all lower levels for doc_ops with level update
        supported_d_levels = deepcopy(self.d_level_order)
        if self.bucket_type == Bucket.Type.EPHEMERAL:
            supported_d_levels = supported_d_levels[0:2]

        supported_d_levels.reverse()
        supported_d_levels += [supported_d_levels[0]]

        create_desc = "Creating %s bucket with level '%s'" \
                      % (self.bucket_type, supported_d_levels[0])

        self.log.info(create_desc)
        bucket_dict = self.get_bucket_dict(self.bucket_type,
                                           supported_d_levels[0])
        # Object to support performing CRUDs and create Bucket
        bucket_obj = Bucket(bucket_dict)
        self.bucket_util.create_bucket(self.cluster, bucket_obj,
                                       wait_for_warmup=True)
        self.get_vbucket_type_mapping(bucket_obj.name)
        self.summary.add_step(create_desc)

        self.bucket_util.print_bucket_stats(self.cluster)

        # Loop to update all other durability levels
        prev_d_level = supported_d_levels[0]
        for bucket_durability in supported_d_levels[1:]:
            target_vb_type, simulate_error = \
                self.durability_helper.get_vb_and_error_type(bucket_durability)

            # Pick a random node to perform error sim and load
            random_node = choice(self.vbs_in_node.keys())
            error_sim = CouchbaseError(
                self.log,
                self.vbs_in_node[random_node]["shell"])

            target_vbs = self.vbs_in_node[random_node][target_vb_type]
            doc_gen = doc_generator(self.key, 0, 1,
                                    target_vbucket=target_vbs)

            doc_load_task = self.task.async_load_gen_docs(
                self.cluster, bucket_obj, doc_gen, "update",
                durability=Bucket.DurabilityLevel.NONE,
                timeout_secs=60,
                start_task=False,
                sdk_client_pool=self.sdk_client_pool)

            # Simulate target error condition
            error_sim.create(simulate_error)
            self.sleep(5, "Wait before starting doc_op")
            self.task_manager.add_new_task(doc_load_task)

            new_d_level = BucketDurability[bucket_durability]
            self.sleep(5, "Wait before updating bucket level "
                          "durability=%s" % new_d_level)

            self.bucket_util.update_bucket_property(
                self.cluster.master,
                bucket_obj,
                bucket_durability=new_d_level)
            self.bucket_util.print_bucket_stats(self.cluster)

            buckets = self.bucket_util.get_all_buckets(self.cluster)
            if buckets[0].durability_level != new_d_level:
                self.log_failure("Failed to update bucket_d_level to %s"
                                 % new_d_level)
            self.summary.add_step("Set bucket-durability=%s" % new_d_level)

            if prev_d_level == Bucket.DurabilityLevel.NONE:
                if not doc_load_task.completed:
                    self.log_failure("Doc-op still pending for d_level 'NONE'")
            elif doc_load_task.completed:
                self.log_failure("Doc-op completed before reverting the "
                                 "error condition: %s" % simulate_error)

            # Revert the induced error condition
            error_sim.revert(simulate_error)

            self.task_manager.get_task_result(doc_load_task)
            if doc_load_task.fail:
                self.log_failure("Doc_op failed")
            self.summary.add_step("Doc_op with previous d_level %s"
                                  % prev_d_level)
            prev_d_level = bucket_durability

        # Delete the bucket on server
        self.bucket_util.delete_bucket(self.cluster, bucket_obj)
        self.summary.add_step("Delete %s bucket" % self.bucket_type)
Example #11
0
    def test_maxttl_with_timeout(self):
        """
        1. Stop Memcached on target_nodes based on replicas configured.
        2. Initiate doc_ops with higher sdk_timeout
        3. Sleep for time within the configured sdk_timeout
        4. Resume Memcached on target_nodes to make sure doc_ops go through
        5. Make sure maxTTL is calculated as soon as the active vbucket
           receives the mutation
        :return:
        """
        shell_conn = dict()
        target_vbuckets = list()
        target_nodes = self.getTargetNodes()
        def_bucket = self.cluster.buckets[0]
        self.maxttl = self.input.param("doc_ttl", self.maxttl)

        # Open required SDK connections before error_simulation
        gen_create = doc_generator(self.key,
                                   0,
                                   self.num_items,
                                   doc_size=self.doc_size,
                                   doc_type=self.doc_type,
                                   target_vbucket=target_vbuckets,
                                   vbuckets=self.cluster.vbuckets)
        doc_op_task = self.task.async_load_gen_docs(
            self.cluster,
            def_bucket,
            gen_create,
            "create",
            self.maxttl,
            batch_size=10,
            process_concurrency=8,
            replicate_to=self.replicate_to,
            persist_to=self.persist_to,
            durability=self.durability_level,
            timeout_secs=self.sdk_timeout,
            compression=self.sdk_compression,
            start_task=False,
            sdk_client_pool=self.sdk_client_pool)

        # Open shell_conn and create Memcached error for testing MaxTTL
        self.log.info("1. Stopping Memcached on target_nodes")
        for node in target_nodes:
            shell_conn[node.ip] = RemoteMachineShellConnection(node)
            cbstats = Cbstats(shell_conn[node.ip])
            target_vbuckets += cbstats.vbucket_list(def_bucket.name, "replica")
            cb_error = CouchbaseError(self.log, shell_conn[node.ip])
            cb_error.create(CouchbaseError.STOP_MEMCACHED, def_bucket.name)

        self.log.info("2. Initiating the doc_ops with doc TTL")
        self.task_manager.add_new_task(doc_op_task)

        self.sleep(self.maxttl, "3. Sleep for max_ttl time")

        # Revert Memcached error and close the shell_conn
        self.log.info("4. Resuming Memcached on target_nodes")
        for node in target_nodes:
            cb_error = CouchbaseError(self.log, shell_conn[node.ip])
            cb_error.revert(CouchbaseError.STOP_MEMCACHED, def_bucket.name)
            shell_conn[node.ip].disconnect()

        self.log.info("5. Waiting for doc_ops to complete")
        self.task.jython_task_manager.get_task_result(doc_op_task)

        self.bucket_util._expiry_pager(self.cluster, val=1)
        self.sleep(10, "6. Waiting for items to be purged")

        # Read all expired docs to validate all keys present
        doc_op_task = self.task.async_load_gen_docs(
            self.cluster,
            def_bucket,
            gen_create,
            "read",
            batch_size=10,
            process_concurrency=8,
            timeout_secs=self.sdk_timeout,
            sdk_client_pool=self.sdk_client_pool)
        self.task.jython_task_manager.get_task_result(doc_op_task)

        self.log.info("7. Validating docs expired after TTL, "
                      "even before sync_write succeeds")
        if len(doc_op_task.success.keys()) == self.num_items:
            self.fail("No docs deleted after MaxTTL time: %s" %
                      doc_op_task.success.keys())

        self.sleep(10, "8. Waiting for all docs to be purged")
        # Read all expired docs to validate all keys present
        doc_op_task = self.task.async_load_gen_docs(
            self.cluster,
            def_bucket,
            gen_create,
            "read",
            batch_size=10,
            process_concurrency=8,
            timeout_secs=self.sdk_timeout,
            sdk_client_pool=self.sdk_client_pool)
        self.task.jython_task_manager.get_task_result(doc_op_task)

        self.log.info("9. Validating docs expired after TTL")
        if len(doc_op_task.fail.keys()) != self.num_items:
            self.fail("Items not deleted after MaxTTL time: %s" %
                      doc_op_task.success.keys())

        # Validate cas for purged items
        keys_with_cas = list()
        for key, result in doc_op_task.fail.items():
            if result['cas'] != 0:
                keys_with_cas.append(key)
        if len(keys_with_cas) != 0:
            self.fail("Following failed keys has CAS: %s" % keys_with_cas)

        # Recreate all docs without any node issues
        doc_op_task = self.task.async_load_gen_docs(
            self.cluster,
            def_bucket,
            gen_create,
            "create",
            0,
            batch_size=10,
            process_concurrency=8,
            durability=self.durability_level,
            timeout_secs=self.sdk_timeout,
            compression=self.sdk_compression,
            sdk_client_pool=self.sdk_client_pool)
        self.task.jython_task_manager.get_task_result(doc_op_task)

        self.log.info("10. Validating docs exists after creation")
        if len(doc_op_task.fail.keys()) != 0:
            self.fail("Doc recreate failed for keys: %s" %
                      doc_op_task.fail.keys())

        # Final doc_count validation
        self.bucket_util._wait_for_stats_all_buckets(self.cluster,
                                                     self.cluster.buckets)
        self.bucket_util.verify_stats_all_buckets(self.cluster, self.num_items)
Example #12
0
class BucketWarmup(CollectionBase):
    def setUp(self):
        super(BucketWarmup, self).setUp()
        self.load_spec = self.input.param("load_spec",
                                          "def_load_random_collection")
        self.bucket = self.bucket_util.buckets[0]

    def create_scope(self):
        self.bucket_util.create_scope(self.cluster.master, self.bucket,
                                      self.scope_name)

    def drop_scope(self):
        self.bucket_util.drop_scope(self.cluster.master, self.bucket,
                                    self.scope_name)
        del self.bucket.scopes[self.scope_name]

    def create_collection(self):
        self.bucket_util.create_collection(self.cluster.master, self.bucket,
                                           CbServer.default_scope,
                                           self.collection_name)

    def drop_collection(self):
        self.bucket_util.drop_collection(self.cluster.master, self.bucket,
                                         self.scope_name, self.collection_name)
        del self.bucket.scopes[scope_name] \
                           .collections[self.collection_name]

    def random_load(self):
        doc_loading_spec = \
            self.bucket_util.get_crud_template_from_package(self.load_spec)
        self.bucket_util.run_scenario_from_spec(self.task,
                                                self.cluster,
                                                self.bucket_util.buckets,
                                                doc_loading_spec,
                                                mutation_num=0)

    def perform_operation_during_bucket_warmup(self, during_warmup="default"):
        # stop memcached in master node
        shell_conn = RemoteMachineShellConnection(self.cluster.master)
        self.error_sim = CouchbaseError(self.log, shell_conn)
        self.error_sim.create(CouchbaseError.STOP_MEMCACHED)
        self.log.info("memcached stopped on master node")

        if during_warmup == "create_scope":
            try:
                self.scope_name = self.bucket_util.get_random_name()
                self.create_scope()
                self.log_failure("drop scope succeeded")
            except Exception as e:
                self.log.info(e)
                self.error_sim.revert(CouchbaseError.STOP_MEMCACHED)
                self.create_scope()

        elif during_warmup == "drop_scope":
            retry = 5
            while retry > 0:
                scope_dict = self.bucket_util.get_random_scopes(
                    self.bucket_util.buckets, 1, 1)
                self.scope_name = scope_dict[
                    self.bucket.name]["scopes"].keys()[0]
                if self.scope_name != "_default":
                    break
                retry -= 1
            try:
                self.drop_scope()
                self.log_failure("drop scope succeeded")
            except Exception as e:
                self.log.info(e)
                self.error_sim.revert(CouchbaseError.STOP_MEMCACHED)
                self.drop_scope()

        elif during_warmup == "create_collection":
            self.collection_name = self.bucket_util.get_random_name()
            try:
                self.create_collection()
                self.log_failure("create collection succeeded")
            except Exception as e:
                self.log.info(e)
                self.error_sim.revert(CouchbaseError.STOP_MEMCACHED)
                self.create_collection()

        elif during_warmup == "drop_collection":
            collections = self.bucket_util.get_random_collections(
                self.bucket_util.buckets, 1, 1, 1)
            scope_dict = collections[self.bucket.name]["scopes"]
            self.scope_name = scope_dict.keys()[0]
            self.collection_name = scope_dict[scope_name]["collections"].keys(
            )[0]
            try:
                self.drop_collection()
                self.log_failure("drop collection succeeded")
            except Exception as e:
                self.log.info(e)
                self.error_sim.revert(CouchbaseError.STOP_MEMCACHED)
                self.drop_collection()

        else:
            try:
                self.random_load()
                self.log_failure("random operation succeeded")
            except Exception as e:
                self.log.info(e)
                self.error_sim.revert(CouchbaseError.STOP_MEMCACHED)
                self.random_load()

        self.bucket_util.validate_docs_per_collections_all_buckets()
        self.validate_test_failure()

    def test_create_scope_during_warmup(self):
        self.perform_operation_during_bucket_warmup("create_scope")

    def test_drop_scope_during_warmup(self):
        self.perform_operation_during_bucket_warmup("drop_scope")

    def test_create_collection_during_warmup(self):
        self.perform_operation_during_bucket_warmup("create_collection")

    def test_delete_collection_during_warmup(self):
        self.perform_operation_during_bucket_warmup("drop_collection")

    def test_perform_random_operation_during_warmup(self):
        self.perform_operation_during_bucket_warmup()

    def tearDown(self):
        self.error_sim.revert(CouchbaseError.STOP_MEMCACHED)
Example #13
0
    def test_create_remove_scope_with_node_crash(self):
        """
        1. Select a error scenario to simulate in random
        2. Create error scenario either before or after scope create/delete
        3. Initiate scope creation/deletion under the bucket
        4. Validate the outcome of scope creation/deletion
        """
        def create_scope(client_type, bucket_obj, scope):
            if client_type == "sdk":
                client.create_scope(scope)
            elif client_type == "rest":
                self.bucket_util.create_scope(self.cluster.master, bucket_obj,
                                              {"name": scope})
            else:
                self.log_failure("Invalid client_type provided")

        def remove_scope(client_type, bucket_obj, scope):
            if client_type == "sdk":
                client.drop_scope(scope)
            elif client_type == "rest":
                self.bucket_util.drop_scope(self.cluster.master,
                                            bucket_obj,
                                            scope)
            else:
                self.log_failure("Invalid client_type provided")

        kv_nodes = self.cluster_util.get_kv_nodes()
        if len(kv_nodes) == 1:
            self.fail("Need atleast two KV nodes to run this test")

        client = None
        action = self.input.param("action", "create")
        crash_during = self.input.param("crash_during", "pre_action")
        data_load_option = self.input.param("data_load_option", None)
        crash_type = self.input.param("simulate_error",
                                      CouchbaseError.KILL_MEMCACHED)

        # Always use a random scope name to create/remove
        # since CREATE/DROP not supported for default scope
        self.scope_name = BucketUtils.get_random_name()

        # Select a KV node other than master node from the cluster
        node_to_crash = kv_nodes[sample(range(1, len(kv_nodes)), 1)[0]]

        # Create a required client object
        if self.client_type == "sdk":
            client = SDKClient([self.cluster.master], self.bucket)

        if action == "remove":
            # Create a scope to be removed
            use_client = sample(["sdk", "rest"], 1)[0]
            create_scope(use_client, self.bucket, self.scope_name)

        # Create a error scenario
        shell = RemoteMachineShellConnection(node_to_crash)
        cb_error = CouchbaseError(self.log, shell)
        cbstat_obj = Cbstats(shell)
        active_vbs = cbstat_obj.vbucket_list(self.bucket.name,
                                             vbucket_type="active")
        target_vbuckets = list(
            set(range(0, 1024)).difference(set(active_vbs)))
        doc_gen = doc_generator(self.key, 0, 1000,
                                target_vbucket=target_vbuckets)

        if crash_during == "pre_action":
            cb_error.create(crash_type)

        if action == "create":
            create_scope(self.client_type, self.bucket, self.scope_name)
        elif action == "remove":
            remove_scope(self.client_type, self.bucket, self.scope_name)

        if crash_during == "post_action":
            cb_error.create(crash_type)

        if data_load_option == "mutate_default_collection":
            task = self.task.async_load_gen_docs(
                self.cluster, self.bucket, doc_gen, "update",
                exp=self.maxttl,
                batch_size=200, process_concurrency=8,
                compression=self.sdk_compression,
                durability=self.durability_level,
                timeout_secs=self.sdk_timeout)
            self.task_manager.get_task_result(task)

        self.sleep(60, "Wait before reverting the error scenario")
        cb_error.revert(crash_type)

        # Close SSH and SDK connections
        shell.disconnect()
        if self.client_type == "sdk":
            client.close()

        self.bucket_util.validate_docs_per_collections_all_buckets()
        self.validate_test_failure()
Example #14
0
    def test_doc_size_exceptions(self):
        """
        Basic tests for document CRUD operations using JSON docs
        """
        #         self.sleep(10, "Wait for bucket to finish warm-up")
        def_bucket = self.bucket_util.buckets[0]

        self.cluster_util.add_node(self.servers[1])
        if self.target_vbucket and type(self.target_vbucket) is not list:
            self.target_vbucket = [self.target_vbucket]

        self.log.info("Creating doc_generator..")
        # Load basic docs into bucket
        doc_create = doc_generator(self.key,
                                   0,
                                   100,
                                   doc_size=self.doc_size,
                                   doc_type=self.doc_type,
                                   target_vbucket=self.target_vbucket,
                                   vbuckets=self.vbuckets)
        self.log.info("doc_generator created")
        unwanted, retried = self.bucket_util.load_bucket_exceptions(
            self.cluster,
            def_bucket,
            doc_create,
            "create",
            0,
            batch_size=10,
            process_concurrency=1,
            replicate_to=self.replicate_to,
            persist_to=self.persist_to,
            timeout_secs=self.sdk_timeout,
            retries=self.sdk_retries,
            durability="majority")

        err = CouchbaseError(self.log,
                             RemoteMachineShellConnection(self.servers[1]))
        err.create(CouchbaseError.STOP_MEMCACHED)

        doc_create = doc_generator(self.key,
                                   100,
                                   200,
                                   doc_size=self.doc_size,
                                   doc_type=self.doc_type,
                                   target_vbucket=self.target_vbucket,
                                   vbuckets=self.vbuckets)

        unwanted, retried = self.bucket_util.load_bucket_exceptions(
            self.cluster,
            def_bucket,
            doc_create,
            "create",
            0,
            batch_size=100,
            process_concurrency=1,
            replicate_to=self.replicate_to,
            persist_to=self.persist_to,
            timeout_secs=self.sdk_timeout,
            retries=self.sdk_retries,
            durability="majority",
            ignore_exceptions=["RequestTimeoutException"])

        err.revert(CouchbaseError.STOP_MEMCACHED)

        unwanted, retried = self.bucket_util.load_bucket_exceptions(
            self.cluster,
            def_bucket,
            doc_create,
            "create",
            0,
            batch_size=100,
            process_concurrency=1,
            replicate_to=self.replicate_to,
            persist_to=self.persist_to,
            timeout_secs=self.sdk_timeout,
            retries=self.sdk_retries,
            durability="majority")
Example #15
0
    def test_create_remove_collection_with_node_crash(self):
        """
        1. Select a error scenario to simulate in random
        2. Create error scenario either before or after collection action
        3. Initiate collection creation/deletion under the bucket
        4. Validate the outcome of collection creation/deletion
        """
        def create_collection(client_type, bucket_obj, scope, collection):
            if client_type == "sdk":
                client.create_collection(collection, scope)
                self.bucket_util.create_collection_object(bucket_obj, scope,
                                                          {"name": collection})
            elif client_type == "rest":
                self.bucket_util.create_collection(self.cluster.master,
                                                   bucket_obj,
                                                   scope,
                                                   {"name": collection})
            else:
                self.log_failure("Invalid client_type provided")

        def remove_collection(client_type, bucket_obj, scope, collection):
            if client_type == "sdk":
                client.drop_collection(scope, collection)
                self.bucket_util.mark_collection_as_dropped(bucket_obj, scope,
                                                            collection)
            elif client_type == "rest":
                self.bucket_util.drop_collection(self.cluster.master,
                                                 bucket_obj, scope, collection)
            else:
                self.log_failure("Invalid client_type provided")

        kv_nodes = self.cluster_util.get_kv_nodes()
        if len(kv_nodes) == 1:
            self.fail("Need atleast two KV nodes to run this test")

        client = None
        task = None
        action = self.input.param("action", "create")
        crash_during = self.input.param("crash_during", "pre_action")
        data_load_option = self.input.param("data_load_option", None)
        crash_type = self.input.param("simulate_error",
                                      CouchbaseError.KILL_MEMCACHED)

        if self.scope_name != CbServer.default_scope:
            self.scope_name = \
                BucketUtils.get_random_name(
                    max_length=CbServer.max_scope_name_len)
            self.bucket_util.create_scope(self.cluster.master, self.bucket,
                                          {"name": self.scope_name})
        if self.collection_name != CbServer.default_collection:
            self.collection_name = \
                BucketUtils.get_random_name(
                    max_length=CbServer.max_collection_name_len)

        # Select a KV node other than master node from the cluster
        node_to_crash = kv_nodes[sample(range(1, len(kv_nodes)), 1)[0]]

        client = self.sdk_client_pool.get_client_for_bucket(self.bucket)
        use_client = sample(["sdk", "rest"], 1)[0]

        if action == "remove" \
                and self.collection_name != CbServer.default_collection:
            # Create a collection to be removed
            create_collection(use_client, self.bucket,
                              self.scope_name, self.collection_name)

        # Create a error scenario
        self.log.info("Selected scenario for test '%s'" % crash_type)
        shell = RemoteMachineShellConnection(node_to_crash)
        cb_error = CouchbaseError(self.log, shell)
        cbstat_obj = Cbstats(shell)
        active_vbs = cbstat_obj.vbucket_list(self.bucket.name,
                                             vbucket_type="active")
        target_vbuckets = list(
            set(range(0, 1024)).difference(set(active_vbs)))
        doc_gen = doc_generator(self.key, 0, 1000,
                                target_vbucket=target_vbuckets)

        if crash_during == "pre_action":
            cb_error.create(crash_type)

        if data_load_option == "mutate_default_collection":
            task = self.task.async_load_gen_docs(
                self.cluster, self.bucket, doc_gen,
                DocLoading.Bucket.DocOps.UPDATE,
                exp=self.maxttl,
                batch_size=200, process_concurrency=8,
                compression=self.sdk_compression,
                durability=self.durability_level,
                timeout_secs=self.sdk_timeout)

        if action == "create":
            create_collection(self.client_type, self.bucket,
                              self.scope_name, self.collection_name)
        elif action == "remove":
            remove_collection(self.client_type, self.bucket,
                              self.scope_name, self.collection_name)

        if crash_during == "post_action":
            cb_error.create(crash_type)

        if data_load_option == "mutate_default_collection":
            self.task_manager.get_task_result(task)

        self.sleep(60, "Wait before reverting the error scenario")
        cb_error.revert(crash_type)

        # Close SSH and SDK connections
        shell.disconnect()
        if self.atomicity is False:
            self.bucket_util.validate_docs_per_collections_all_buckets(
                self.cluster)
        self.validate_test_failure()
Example #16
0
    def test_stop_process(self):
        """
        1. Starting loading docs into the default bucket
        2. Stop the requested process, which will impact the
           memcached operations
        3. Wait for load bucket task to complete
        4. Validate the docs for durability
        """
        error_to_simulate = self.input.param("simulate_error", None)
        target_node = self.getTargetNode()
        remote = RemoteMachineShellConnection(target_node)
        error_sim = CouchbaseError(self.log, remote)
        target_vbuckets = CrashTest.getVbucketNumbers(
            remote, self.bucket.name, self.target_node)

        bucket_dict = BucketUtils.get_random_collections(
            self.cluster.buckets,
            req_num=1,
            consider_scopes="all",
            consider_buckets="all")

        bucket = BucketUtils.get_bucket_obj(self.cluster.buckets,
                                            bucket_dict.keys()[0])
        scope_name = bucket_dict[bucket.name]["scopes"].keys()[0]
        collection_name = bucket_dict[bucket.name][
            "scopes"][scope_name]["collections"].keys()[0]
        scope = BucketUtils.get_scope_obj(
            bucket, scope_name)
        collection = BucketUtils.get_collection_obj(scope, collection_name)

        if len(target_vbuckets) == 0:
            self.log.error("No target vbucket list generated to load data")
            remote.disconnect()
            return

        self.start_doc_loading_tasks(target_vbuckets, scope_name, collection)

        # Induce the error condition
        error_sim.create(error_to_simulate)

        self.sleep(20, "Wait before reverting the error condition")
        # Revert the simulated error condition and close the ssh session
        error_sim.revert(error_to_simulate)
        remote.disconnect()

        # Wait for doc loading task to complete
        self.task.jython_task_manager.get_task_result(self.doc_loading_task)
        if self.atomicity:
            self.task.jython_task_manager.get_task_result(
                self.transaction_load_task)
        elif self.N1qltxn:
            self.task.jython_task_manager.get_task_result(
                self.N1ql_load_task)

        if len(self.doc_loading_task.fail.keys()) != 0:
            if self.target_node == "active" or self.num_replicas in [2, 3]:
                self.log_failure("Unwanted failures for keys: %s"
                                 % self.doc_loading_task.fail.keys())

        validate_passed = \
            self.durability_helper.validate_durability_exception(
                self.doc_loading_task.fail,
                SDKException.DurabilityAmbiguousException)
        if not validate_passed:
            self.log_failure("Unwanted exception seen during validation")

        # Get SDK client for CRUD retries
        sdk_client = self.sdk_client_pool.get_client_for_bucket(self.bucket)
        for doc_key, crud_result in self.doc_loading_task.fail.items():
            result = sdk_client.crud(DocLoading.Bucket.DocOps.CREATE,
                                     doc_key,
                                     crud_result["value"],
                                     replicate_to=self.replicate_to,
                                     persist_to=self.persist_to,
                                     durability=self.durability_level,
                                     timeout=self.sdk_timeout)
            if result["status"] is False:
                self.log_failure("Retry of doc_key %s failed: %s"
                                 % (doc_key, result["error"]))
        # Close the SDK connection
        self.sdk_client_pool.release_client(sdk_client)

        self.validate_test_failure()

        self.bucket_util._wait_for_stats_all_buckets(self.cluster.buckets)
        # Update self.num_items and validate docs per collection
        if not self.N1qltxn and self.atomicity is False:
            self.bucket_util.validate_docs_per_collections_all_buckets(
                self.cluster)
Example #17
0
    def test_concurrent_failover_timer_reset(self):
        """
        1. Trigger failure on destined nodes
        2. Wait for little less time than failover_timeout
        3. Bring back few nodes back online for few seconds
        4. Make sure no auto failover triggered till next failover timeout
        5. Validate auto failovers after new timeout
        """

        services_to_fo = self.failover_order[0].split(":")
        self.nodes_to_fail = self.get_nodes_to_fail(services_to_fo,
                                                    dynamic_fo_method=True)
        expected_fo_nodes = self.num_nodes_to_be_failover
        self.__update_server_obj()
        rand_node = choice(self.nodes_to_fail.keys())
        self.__update_unaffected_node()
        self.__display_failure_node_status("Nodes to be failed")
        try:
            self.log.info("Starting auto-failover procedure")
            failover_task = ConcurrentFailoverTask(
                task_manager=self.task_manager,
                master=self.orchestrator,
                servers_to_fail=self.nodes_to_fail,
                expected_fo_nodes=expected_fo_nodes,
                task_type="induce_failure")
            self.task_manager.add_new_task(failover_task)
            self.sleep(int(self.timeout * 0.7),
                       "Wait before bringing back the failed nodes")

            self.log.info("Bringing back '%s' for some time" % rand_node.ip)
            new_timer = None
            shell = RemoteMachineShellConnection(rand_node)
            cb_err = CouchbaseError(self.log, shell)
            if self.nodes_to_fail[rand_node] == CouchbaseError.STOP_MEMCACHED:
                cb_err.revert(CouchbaseError.STOP_MEMCACHED)
                self.sleep(10, "Wait before creating failure again")
                cb_err.create(CouchbaseError.STOP_MEMCACHED)
                new_timer = time()
            elif self.nodes_to_fail[rand_node] == "stop_couchbase":
                cb_err.revert(CouchbaseError.STOP_SERVER)
                self.sleep(10, "Wait before creating failure again")
                cb_err.create(CouchbaseError.STOP_SERVER)
                new_timer = time()
            shell.disconnect()

            # Validate the previous auto-failover task failed
            # due to the random_node coming back online
            self.task_manager.get_task_result(failover_task)
            self.assertFalse(failover_task.result,
                             "Nodes failed over though nodes became active")

            # Validate auto_failover_settings
            self.validate_failover_settings(True, self.timeout, 0,
                                            self.max_count)

            # Make sure the new auto-failover timing is honoured
            new_timer = new_timer + self.timeout
            while int(time()) < new_timer:
                settings = self.rest.get_autofailover_settings()
                if settings.count != 0:
                    self.fail("Nodes failed over before new failover time")

            self.sleep(10, "Wait for failover rebalance to trigger")
            self.rest.monitorRebalance()

            # Validate auto_failover_settings after actual auto failover
            self.validate_failover_settings(True, self.timeout,
                                            expected_fo_nodes, self.max_count)
        finally:
            # Recover all nodes from induced failures
            failover_task = ConcurrentFailoverTask(
                task_manager=self.task_manager,
                master=self.orchestrator,
                servers_to_fail=self.nodes_to_fail,
                expected_fo_nodes=expected_fo_nodes,
                task_type="revert_failure")
            self.task_manager.add_new_task(failover_task)
            self.task_manager.get_task_result(failover_task)

        self.log.info("Rebalance out the failed nodes")
        result = self.cluster_util.rebalance(self.cluster)
        self.assertTrue(result, "Final rebalance failed")

        # Perform collection crud + doc_ops after rebalance operation
        self.__perform_doc_ops()