def test_bulk_sync_write_in_progress(self):
        doc_ops = self.input.param("doc_ops").split(';')
        shell_conn = dict()
        cbstat_obj = dict()
        error_sim = dict()
        vb_info = dict()
        active_vbs = dict()
        replica_vbs = dict()
        sync_write_in_progress = \
            SDKException.RetryReason.KV_SYNC_WRITE_IN_PROGRESS

        # Override d_level, error_simulation type based on d_level
        self.__get_d_level_and_error_to_simulate()

        target_nodes = DurabilityHelper.getTargetNodes(self.cluster,
                                                       self.nodes_init,
                                                       self.num_nodes_affected)
        for node in target_nodes:
            shell_conn[node.ip] = RemoteMachineShellConnection(node)
            cbstat_obj[node.ip] = Cbstats(node)
            vb_info["init"] = dict()
            vb_info["init"][node.ip] = cbstat_obj[node.ip].vbucket_seqno(
                self.bucket.name)
            error_sim[node.ip] = CouchbaseError(self.log, shell_conn[node.ip])
            # Fetch affected nodes' vb_num which are of type=replica
            active_vbs[node.ip] = cbstat_obj[node.ip].vbucket_list(
                self.bucket.name, vbucket_type="active")
            replica_vbs[node.ip] = cbstat_obj[node.ip].vbucket_list(
                self.bucket.name, vbucket_type="replica")

        target_vbs = replica_vbs
        if self.durability_level \
                == Bucket.DurabilityLevel.MAJORITY_AND_PERSIST_TO_ACTIVE:
            target_vbs = active_vbs
            target_vbuckets = list()
            for target_node in target_nodes:
                target_vbuckets += target_vbs[target_node.ip]
        else:
            target_vbuckets = target_vbs[target_nodes[0].ip]
            if len(target_nodes) > 1:
                index = 1
                while index < len(target_nodes):
                    target_vbuckets = list(
                        set(target_vbuckets).intersection(
                            set(target_vbs[target_nodes[index].ip])))
                    index += 1

        doc_load_spec = dict()
        doc_load_spec["doc_crud"] = dict()
        doc_load_spec[MetaCrudParams.TARGET_VBUCKETS] = target_vbuckets
        doc_load_spec[MetaCrudParams.DURABILITY_LEVEL] = self.durability_level
        doc_load_spec[MetaCrudParams.COLLECTIONS_CONSIDERED_FOR_CRUD] = 5
        doc_load_spec[MetaCrudParams.SCOPES_CONSIDERED_FOR_CRUD] = "all"
        doc_load_spec[MetaCrudParams.SDK_TIMEOUT] = 60
        doc_load_spec["doc_crud"][MetaCrudParams.DocCrud.COMMON_DOC_KEY] \
            = "test_collections"

        if doc_ops[0] == "create":
            doc_load_spec["doc_crud"][
                MetaCrudParams.DocCrud.CREATE_PERCENTAGE_PER_COLLECTION] = 1
        elif doc_ops[0] == "update":
            doc_load_spec["doc_crud"][
                MetaCrudParams.DocCrud.UPDATE_PERCENTAGE_PER_COLLECTION] = 1
        elif doc_ops[0] == "replace":
            doc_load_spec["doc_crud"][
                MetaCrudParams.DocCrud.REPLACE_PERCENTAGE_PER_COLLECTION] = 1
        elif doc_ops[0] == "delete":
            doc_load_spec["doc_crud"][
                MetaCrudParams.DocCrud.DELETE_PERCENTAGE_PER_COLLECTION] = 1

        # Induce error condition for testing
        for node in target_nodes:
            error_sim[node.ip].create(self.simulate_error,
                                      bucket_name=self.bucket.name)

        doc_loading_task = \
            self.bucket_util.run_scenario_from_spec(
                self.task,
                self.cluster,
                self.cluster.buckets,
                doc_load_spec,
                async_load=True)

        self.sleep(5, "Wait for doc ops to reach server")

        tem_durability = self.durability_level
        if self.with_non_sync_writes:
            tem_durability = "NONE"

        for bucket, s_dict in doc_loading_task.loader_spec.items():
            for s_name, c_dict in s_dict["scopes"].items():
                for c_name, c_meta in c_dict["collections"].items():
                    for op_type in c_meta:
                        # This will support both sync-write and non-sync-writes
                        doc_loader_task_2 = self.task.async_load_gen_docs(
                            self.cluster,
                            self.bucket,
                            c_meta[op_type]["doc_gen"],
                            doc_ops[1],
                            0,
                            scope=s_name,
                            collection=c_name,
                            sdk_client_pool=self.sdk_client_pool,
                            batch_size=self.crud_batch_size,
                            process_concurrency=1,
                            replicate_to=self.replicate_to,
                            persist_to=self.persist_to,
                            durability=tem_durability,
                            timeout_secs=3,
                            print_ops_rate=False,
                            skip_read_on_error=True,
                            task_identifier="parallel_task2")
                        self.task.jython_task_manager.get_task_result(
                            doc_loader_task_2)

                        # Validation to verify the sync_in_write_errors
                        # in doc_loader_task_2
                        failed_docs = doc_loader_task_2.fail
                        if len(failed_docs.keys()) != 1:
                            self.log_failure(
                                "Exception not seen for docs: %s" %
                                failed_docs)

                        valid_exception = self.durability_helper\
                            .validate_durability_exception(
                                failed_docs,
                                SDKException.AmbiguousTimeoutException,
                                retry_reason=sync_write_in_progress)

                        if not valid_exception:
                            self.log_failure("Got invalid exception")

        # Revert the introduced error condition
        for node in target_nodes:
            error_sim[node.ip].revert(self.simulate_error,
                                      bucket_name=self.bucket.name)

        # Wait for doc_loading to complete
        self.task_manager.get_task_result(doc_loading_task)
        self.bucket_util.validate_doc_loading_results(doc_loading_task)
        if doc_loading_task.result is False:
            self.log_failure("Doc CRUDs failed")

        # Validate docs for update success or not
        if doc_ops[0] == "update":
            for bucket, s_dict in doc_loading_task.loader_spec.items():
                for s_name, c_dict in s_dict["scopes"].items():
                    for c_name, c_meta in c_dict["collections"].items():
                        for op_type in c_meta:
                            read_task = self.task.async_load_gen_docs(
                                self.cluster,
                                self.bucket,
                                c_meta[op_type]["doc_gen"],
                                "read",
                                batch_size=self.crud_batch_size,
                                process_concurrency=1,
                                timeout_secs=self.sdk_timeout)
                            self.task_manager.get_task_result(read_task)
                            for key, doc_info in read_task.success.items():
                                if doc_info["cas"] != 0 \
                                        and json.loads(str(doc_info["value"])
                                                       )["mutated"] != 1:
                                    self.log_failure(
                                        "Update failed for key %s: %s" %
                                        (key, doc_info))

        # Validate doc_count per collection
        self.validate_test_failure()
        self.bucket_util.validate_docs_per_collections_all_buckets(
            self.cluster)
    def test_sub_doc_sync_write_in_progress(self):
        """
        Test to simulate sync_write_in_progress error and validate the behavior
        This will validate failure in majority of nodes, where durability will
        surely fail for all CRUDs

        1. Select nodes to simulate the error which will affect the durability
        2. Enable the specified error_scenario on the selected nodes
        3. Perform individual CRUDs and verify sync_write_in_progress errors
        4. Validate the end results
        """

        doc_ops = self.input.param("doc_ops", "insert")

        shell_conn = dict()
        cbstat_obj = dict()
        error_sim = dict()
        vb_info = dict()
        active_vbs = dict()
        replica_vbs = dict()
        vb_info["init"] = dict()
        doc_load_spec = dict()

        # Override d_level, error_simulation type based on d_level
        self.__get_d_level_and_error_to_simulate()

        target_nodes = DurabilityHelper.getTargetNodes(self.cluster,
                                                       self.nodes_init,
                                                       self.num_nodes_affected)
        for node in target_nodes:
            shell_conn[node.ip] = RemoteMachineShellConnection(node)
            cbstat_obj[node.ip] = Cbstats(node)
            vb_info["init"] = dict()
            vb_info["init"][node.ip] = cbstat_obj[node.ip].vbucket_seqno(
                self.bucket.name)
            error_sim[node.ip] = CouchbaseError(self.log, shell_conn[node.ip])
            # Fetch affected nodes' vb_num which are of type=replica
            active_vbs[node.ip] = cbstat_obj[node.ip].vbucket_list(
                self.bucket.name, vbucket_type="active")
            replica_vbs[node.ip] = cbstat_obj[node.ip].vbucket_list(
                self.bucket.name, vbucket_type="replica")

        target_vbs = replica_vbs
        if self.durability_level \
                == Bucket.DurabilityLevel.MAJORITY_AND_PERSIST_TO_ACTIVE:
            target_vbs = active_vbs
            target_vbuckets = list()
            for target_node in target_nodes:
                target_vbuckets += target_vbs[target_node.ip]
        else:
            target_vbuckets = target_vbs[target_nodes[0].ip]
            if len(target_nodes) > 1:
                index = 1
                while index < len(target_nodes):
                    target_vbuckets = list(
                        set(target_vbuckets).intersection(
                            set(target_vbs[target_nodes[index].ip])))
                    index += 1

        amb_timeout = SDKException.AmbiguousTimeoutException
        kv_sync_write_in_progress = \
            SDKException.RetryReason.KV_SYNC_WRITE_IN_PROGRESS
        doc_not_found_exception = SDKException.DocumentNotFoundException

        self.load_data_for_sub_doc_ops()

        doc_load_spec["doc_crud"] = dict()
        doc_load_spec["subdoc_crud"] = dict()
        doc_load_spec["doc_crud"][MetaCrudParams.DocCrud.COMMON_DOC_KEY] \
            = "test_collections"
        doc_load_spec[MetaCrudParams.TARGET_VBUCKETS] = target_vbuckets
        doc_load_spec[MetaCrudParams.DURABILITY_LEVEL] = self.durability_level
        doc_load_spec[MetaCrudParams.COLLECTIONS_CONSIDERED_FOR_CRUD] = 5
        doc_load_spec[MetaCrudParams.SCOPES_CONSIDERED_FOR_CRUD] = "all"
        doc_load_spec[MetaCrudParams.SDK_TIMEOUT] = 60

        # Acquire SDK client from the pool for performing doc_ops locally
        client = self.sdk_client_pool.get_client_for_bucket(self.bucket)
        # Override the crud_batch_size
        self.crud_batch_size = 5

        # Update mutation spec based on the required doc_operation
        if doc_ops == DocLoading.Bucket.DocOps.CREATE:
            doc_load_spec["doc_crud"][
                MetaCrudParams.DocCrud.CREATE_PERCENTAGE_PER_COLLECTION] = 1
        elif doc_ops in DocLoading.Bucket.DocOps.UPDATE:
            doc_load_spec["doc_crud"][
                MetaCrudParams.DocCrud.UPDATE_PERCENTAGE_PER_COLLECTION] = 1
        elif doc_ops == DocLoading.Bucket.DocOps.DELETE:
            doc_load_spec["doc_crud"][
                MetaCrudParams.DocCrud.DELETE_PERCENTAGE_PER_COLLECTION] = 1
        elif doc_ops == DocLoading.Bucket.SubDocOps.INSERT:
            doc_load_spec["subdoc_crud"][
                MetaCrudParams.SubDocCrud.INSERT_PER_COLLECTION] = 1
        elif doc_ops == DocLoading.Bucket.SubDocOps.UPSERT:
            doc_load_spec["subdoc_crud"][
                MetaCrudParams.SubDocCrud.UPSERT_PER_COLLECTION] = 1
        elif doc_ops == DocLoading.Bucket.SubDocOps.REMOVE:
            doc_load_spec["subdoc_crud"][
                MetaCrudParams.SubDocCrud.REMOVE_PER_COLLECTION] = 1

        # This is to support both sync-write and non-sync-writes
        tem_durability = self.durability_level
        if self.with_non_sync_writes:
            tem_durability = Bucket.DurabilityLevel.NONE

        # Perform specified action
        for node in target_nodes:
            error_sim[node.ip].create(self.simulate_error,
                                      bucket_name=self.bucket.name)
        self.sleep(5, "Wait for error simulation to take effect")

        # Initialize tasks and store the task objects
        doc_loading_task = \
            self.bucket_util.run_scenario_from_spec(
                self.task,
                self.cluster,
                self.cluster.buckets,
                doc_load_spec,
                mutation_num=2,
                batch_size=1,
                async_load=True)

        # Start the doc_loader_task
        self.sleep(10, "Wait for task_1 CRUDs to reach server")

        for bucket, s_dict in doc_loading_task.loader_spec.items():
            for s_name, c_dict in s_dict["scopes"].items():
                for c_name, c_meta in c_dict["collections"].items():
                    for op_type in c_meta:
                        key, _ = c_meta[op_type]["doc_gen"].next()
                        expected_exception = amb_timeout
                        retry_reason = kv_sync_write_in_progress
                        if doc_ops == "create":
                            expected_exception = doc_not_found_exception
                            retry_reason = None

                        for sub_doc_op in [
                                DocLoading.Bucket.SubDocOps.INSERT,
                                DocLoading.Bucket.SubDocOps.UPSERT,
                                DocLoading.Bucket.SubDocOps.REMOVE
                        ]:
                            val = ["my_mutation", "val"]
                            if sub_doc_op \
                                    == DocLoading.Bucket.SubDocOps.REMOVE:
                                val = "mutated"
                            result = client.crud(sub_doc_op,
                                                 key,
                                                 val,
                                                 durability=tem_durability,
                                                 timeout=2)

                            if result[0]:
                                self.log_failure("Doc crud succeeded for %s" %
                                                 op_type)
                            elif expected_exception \
                                    not in str(result[1][key]["error"]):
                                self.log_failure(
                                    "Invalid exception for key %s: %s" %
                                    (key, result[1][key]["error"]))
                            elif retry_reason is not None and \
                                    retry_reason \
                                    not in str(result[1][key]["error"]):
                                self.log_failure(
                                    "Retry reason missing for key %s: %s" %
                                    (key, result[1][key]["error"]))

        # Revert the introduced error condition
        for node in target_nodes:
            error_sim[node.ip].revert(self.simulate_error,
                                      bucket_name=self.bucket.name)

        # Wait for doc_loader_task_1 to complete
        self.task.jython_task_manager.get_task_result(doc_loading_task)
        self.bucket_util.validate_doc_loading_results(doc_loading_task)
        if doc_loading_task.result is False:
            self.log_failure("Doc CRUDs failed")

        # Validate docs for update success or not
        if doc_ops == DocLoading.Bucket.DocOps.UPDATE:
            for bucket, s_dict in doc_loading_task.loader_spec.items():
                for s_name, c_dict in s_dict["scopes"].items():
                    for c_name, c_meta in c_dict["collections"].items():
                        for op_type in c_meta:
                            c_meta[op_type]["doc_gen"].reset()
                            read_task = self.task.async_load_gen_docs(
                                self.cluster,
                                self.bucket,
                                c_meta[op_type]["doc_gen"],
                                DocLoading.Bucket.DocOps.READ,
                                batch_size=self.crud_batch_size,
                                process_concurrency=1,
                                timeout_secs=self.sdk_timeout)
                            self.task_manager.get_task_result(read_task)
                            for key, doc_info in read_task.success.items():
                                if doc_info["cas"] != 0 and \
                                        json.loads(str(doc_info["value"])
                                                   )["mutated"] != 2:
                                    self.log_failure(
                                        "Update failed for key %s: %s" %
                                        (key, doc_info))

        # Release the acquired SDK client
        self.sdk_client_pool.release_client(client)

        # Verify initial doc load count
        self.bucket_util._wait_for_stats_all_buckets(self.cluster,
                                                     self.cluster.buckets)
        self.bucket_util.validate_docs_per_collections_all_buckets(
            self.cluster)
        self.validate_test_failure()
    def test_sub_doc_with_process_crash(self):
        """
        Test to make sure durability will succeed even if a node goes down
        due to crash and has enough nodes to satisfy the durability

        1. Select a node from the cluster to simulate the specified error
        2. Perform CRUD on the target bucket with given timeout
        3. Using cbstats to verify the operation succeeds
        4. Validate all mutations are succeeded

        Note: self.sdk_timeout values is considered as 'seconds'
        """
        if self.num_replicas < 2:
            self.assertTrue(False, msg="Required: num_replicas > 1")

        # Override num_of_nodes affected to 1
        self.num_nodes_affected = 1

        error_sim = dict()
        shell_conn = dict()
        cbstat_obj = dict()
        failover_info = dict()
        vb_info_info = dict()
        active_vbs_in_target_nodes = list()
        failover_info["init"] = dict()
        failover_info["afterCrud"] = dict()
        vb_info_info["init"] = dict()
        vb_info_info["afterCrud"] = dict()
        def_bucket = self.bucket_util.buckets[0]

        self.load_data_for_sub_doc_ops()

        self.log.info("Selecting nodes to simulate error condition")
        target_nodes = DurabilityHelper.getTargetNodes(self.cluster,
                                                       self.nodes_init,
                                                       self.num_nodes_affected)

        self.log.info("Will simulate error condition on %s" % target_nodes)
        for node in target_nodes:
            # Create shell_connections
            shell_conn[node.ip] = RemoteMachineShellConnection(node)
            cbstat_obj[node.ip] = Cbstats(shell_conn[node.ip])
            active_vbs = cbstat_obj[node.ip].vbucket_list(
                def_bucket.name, "active")
            active_vbs_in_target_nodes += active_vbs
            vb_info_info["init"][node.ip] = cbstat_obj[node.ip].vbucket_seqno(
                def_bucket.name)
            failover_info["init"][node.ip] = \
                cbstat_obj[node.ip].failover_stats(def_bucket.name)

            # Remove active vbuckets from doc_loading to avoid errors

        load_spec = dict()
        # load_spec["target_vbuckets"] = list(set(target_vbuckets)
        #                                    ^ set(active_vbs_in_target_nodes))
        load_spec["doc_crud"] = dict()
        load_spec["subdoc_crud"] = dict()
        load_spec["doc_crud"][
            MetaCrudParams.DocCrud.READ_PERCENTAGE_PER_COLLECTION] = 10
        load_spec["subdoc_crud"][
            MetaCrudParams.SubDocCrud.INSERT_PER_COLLECTION] = 50
        load_spec["subdoc_crud"][
            MetaCrudParams.SubDocCrud.UPSERT_PER_COLLECTION] = 25
        load_spec["subdoc_crud"][
            MetaCrudParams.SubDocCrud.REMOVE_PER_COLLECTION] = 25

        self.log.info("Perform 'create', 'update', 'delete' mutations")

        doc_loading_task = \
            self.bucket_util.run_scenario_from_spec(
                self.task,
                self.cluster,
                self.bucket_util.buckets,
                load_spec,
                mutation_num=1,
                async_load=True)

        self.sleep(5, "Wait for doc loaders to start loading data")

        for node in target_nodes:
            # Perform specified action
            error_sim[node.ip] = CouchbaseError(self.log, shell_conn[node.ip])
            error_sim[node.ip].create(self.simulate_error,
                                      bucket_name=def_bucket.name)

        # Perform new scope/collection creation during doc ops in parallel
        self.__perform_collection_crud(mutation_num=2)

        # Wait for document_loader tasks to complete
        self.task_manager.get_task_result(doc_loading_task)
        self.bucket_util.validate_doc_loading_results(doc_loading_task)
        if doc_loading_task.result is False:
            self.log_failure("Sub_doc CRUDs failed with process crash")

        # Revert the induced error condition
        for node in target_nodes:
            error_sim[node.ip].revert(self.simulate_error,
                                      bucket_name=def_bucket.name)

        # Fetch latest failover stats and validate the values are updated
        self.log.info("Validating failover and seqno cbstats")
        for node in target_nodes:
            vb_info_info["afterCrud"][node.ip] = \
                cbstat_obj[node.ip].vbucket_seqno(def_bucket.name)
            failover_info["afterCrud"][node.ip] = \
                cbstat_obj[node.ip].failover_stats(def_bucket.name)

            # Failover validation
            val = \
                failover_info["init"][node.ip] \
                == failover_info["afterCrud"][node.ip]
            error_msg = "Failover stats not updated after error condition"
            self.assertTrue(val, msg=error_msg)

            # Seq_no validation (High level)
            val = \
                vb_info_info["init"][node.ip] \
                != vb_info_info["afterCrud"][node.ip]
            self.assertTrue(val, msg="vbucket seq_no not updated after CRUDs")

        # Disconnect the shell connection
        for node in target_nodes:
            shell_conn[node.ip].disconnect()

        self.validate_test_failure()
        # Doc count validation
        self.bucket_util._wait_for_stats_all_buckets()
        self.bucket_util.validate_docs_per_collections_all_buckets()
    def test_sync_write_in_progress(self):
        doc_ops = self.input.param("doc_ops", "create;create").split(';')
        shell_conn = dict()
        cbstat_obj = dict()
        error_sim = dict()
        vb_info = dict()
        active_vbs = dict()
        replica_vbs = dict()

        # Override d_level, error_simulation type based on d_level
        self.__get_d_level_and_error_to_simulate()

        # Acquire SDK client from the pool for performing doc_ops locally
        client = SDKClient([self.cluster.master], self.bucket)

        target_nodes = DurabilityHelper.getTargetNodes(self.cluster,
                                                       self.nodes_init,
                                                       self.num_nodes_affected)
        for node in target_nodes:
            shell_conn[node.ip] = RemoteMachineShellConnection(node)
            cbstat_obj[node.ip] = Cbstats(node)
            vb_info["init"] = dict()
            vb_info["init"][node.ip] = cbstat_obj[node.ip].vbucket_seqno(
                self.bucket.name)
            error_sim[node.ip] = CouchbaseError(self.log, shell_conn[node.ip])
            # Fetch affected nodes' vb_num which are of type=replica
            active_vbs[node.ip] = cbstat_obj[node.ip].vbucket_list(
                self.bucket.name, vbucket_type="active")
            replica_vbs[node.ip] = cbstat_obj[node.ip].vbucket_list(
                self.bucket.name, vbucket_type="replica")

        if self.durability_level \
                == Bucket.DurabilityLevel.MAJORITY_AND_PERSIST_TO_ACTIVE:
            target_vbs = active_vbs
            target_vbuckets = list()
            for target_node in target_nodes:
                target_vbuckets += target_vbs[target_node.ip]
        else:
            target_vbuckets = replica_vbs[target_nodes[0].ip]
            if len(target_nodes) > 1:
                index = 1
                while index < len(target_nodes):
                    target_vbuckets = list(
                        set(target_vbuckets).intersection(
                            set(replica_vbs[target_nodes[index].ip])))
                    index += 1

        doc_load_spec = dict()
        doc_load_spec["doc_crud"] = dict()
        doc_load_spec["doc_crud"][MetaCrudParams.DocCrud.COMMON_DOC_KEY] \
            = "test_collections"
        doc_load_spec[MetaCrudParams.TARGET_VBUCKETS] = target_vbuckets
        doc_load_spec[MetaCrudParams.COLLECTIONS_CONSIDERED_FOR_CRUD] = 5
        doc_load_spec[MetaCrudParams.SCOPES_CONSIDERED_FOR_CRUD] = "all"
        doc_load_spec[MetaCrudParams.DURABILITY_LEVEL] = self.durability_level
        doc_load_spec[MetaCrudParams.SDK_TIMEOUT] = 60

        if doc_ops[0] == DocLoading.Bucket.DocOps.CREATE:
            doc_load_spec["doc_crud"][
                MetaCrudParams.DocCrud.CREATE_PERCENTAGE_PER_COLLECTION] = 1
        elif doc_ops[0] == DocLoading.Bucket.DocOps.UPDATE:
            doc_load_spec["doc_crud"][
                MetaCrudParams.DocCrud.UPDATE_PERCENTAGE_PER_COLLECTION] = 1
        elif doc_ops[0] == DocLoading.Bucket.DocOps.REPLACE:
            doc_load_spec["doc_crud"][
                MetaCrudParams.DocCrud.REPLACE_PERCENTAGE_PER_COLLECTION] = 1
        elif doc_ops[0] == DocLoading.Bucket.DocOps.DELETE:
            doc_load_spec["doc_crud"][
                MetaCrudParams.DocCrud.DELETE_PERCENTAGE_PER_COLLECTION] = 1

        # Induce error condition for testing
        for node in target_nodes:
            error_sim[node.ip].create(self.simulate_error,
                                      bucket_name=self.bucket.name)
            self.sleep(3, "Wait for error simulation to take effect")

        doc_loading_task = \
            self.bucket_util.run_scenario_from_spec(
                self.task,
                self.cluster,
                self.cluster.buckets,
                doc_load_spec,
                async_load=True)

        self.sleep(5, "Wait for doc ops to reach server")

        for bucket, s_dict in doc_loading_task.loader_spec.items():
            for s_name, c_dict in s_dict["scopes"].items():
                for c_name, c_meta in c_dict["collections"].items():
                    client.select_collection(s_name, c_name)
                    for op_type in c_meta:
                        key, value = c_meta[op_type]["doc_gen"].next()
                        if self.with_non_sync_writes:
                            fail = client.crud(doc_ops[1],
                                               key,
                                               value,
                                               exp=0,
                                               timeout=2,
                                               time_unit="seconds")
                        else:
                            fail = client.crud(
                                doc_ops[1],
                                key,
                                value,
                                exp=0,
                                durability=self.durability_level,
                                timeout=2,
                                time_unit="seconds")

                        expected_exception = \
                            SDKException.AmbiguousTimeoutException
                        retry_reason = \
                            SDKException.RetryReason.KV_SYNC_WRITE_IN_PROGRESS
                        if doc_ops[0] == DocLoading.Bucket.DocOps.CREATE \
                                and doc_ops[1] in \
                                [DocLoading.Bucket.DocOps.DELETE,
                                 DocLoading.Bucket.DocOps.REPLACE]:
                            expected_exception = \
                                SDKException.DocumentNotFoundException
                            retry_reason = None

                        # Validate the returned error from the SDK
                        if expected_exception not in str(fail["error"]):
                            self.log_failure("Invalid exception for %s: %s" %
                                             (key, fail["error"]))
                        if retry_reason \
                                and retry_reason not in str(fail["error"]):
                            self.log_failure(
                                "Invalid retry reason for %s: %s" %
                                (key, fail["error"]))

                        # Try reading the value in SyncWrite state
                        fail = client.crud("read", key)
                        if doc_ops[0] == "create":
                            # Expected KeyNotFound in case of CREATE op
                            if fail["status"] is True:
                                self.log_failure(
                                    "%s returned value during SyncWrite %s" %
                                    (key, fail))
                        else:
                            # Expects prev val in case of other operations
                            if fail["status"] is False:
                                self.log_failure(
                                    "Key %s read failed for prev value: %s" %
                                    (key, fail))

        # Revert the introduced error condition
        for node in target_nodes:
            error_sim[node.ip].revert(self.simulate_error,
                                      bucket_name=self.bucket.name)

        # Wait for doc_loading to complete
        self.task_manager.get_task_result(doc_loading_task)
        self.bucket_util.validate_doc_loading_results(doc_loading_task)
        if doc_loading_task.result is False:
            self.log_failure("Doc CRUDs failed")

        # Release the acquired SDK client
        client.close()
        self.validate_test_failure()
    def test_sub_doc_with_persistence_issues(self):
        """
        1. Select nodes from the cluster to simulate the specified error
        2. Perform CRUD on the target bucket with given timeout
        3. Using cbstats to verify the operation succeeds
        4. Validate all mutations met the durability condition
        """

        if self.durability_level.upper() in [
                Bucket.DurabilityLevel.MAJORITY_AND_PERSIST_TO_ACTIVE,
                Bucket.DurabilityLevel.PERSIST_TO_MAJORITY
        ]:
            self.log.critical("Test not valid for persistence durability")
            return

        error_sim = dict()
        shell_conn = dict()
        cbstat_obj = dict()
        failover_info = dict()
        vb_info_info = dict()
        active_vbs_in_target_nodes = list()
        failover_info["init"] = dict()
        failover_info["afterCrud"] = dict()
        vb_info_info["init"] = dict()
        vb_info_info["afterCrud"] = dict()
        def_bucket = self.bucket_util.buckets[0]

        load_spec = dict()
        load_spec["doc_crud"] = dict()
        load_spec["subdoc_crud"] = dict()
        load_spec["doc_crud"][
            MetaCrudParams.DocCrud.COMMON_DOC_KEY] = "test_collections"
        load_spec["doc_crud"][
            MetaCrudParams.DocCrud.READ_PERCENTAGE_PER_COLLECTION] = 50
        load_spec["subdoc_crud"][
            MetaCrudParams.SubDocCrud.INSERT_PER_COLLECTION] = 20
        load_spec["subdoc_crud"][
            MetaCrudParams.SubDocCrud.UPSERT_PER_COLLECTION] = 10
        load_spec["subdoc_crud"][
            MetaCrudParams.SubDocCrud.REMOVE_PER_COLLECTION] = 10

        self.log.info("Selecting nodes to simulate error condition")
        target_nodes = DurabilityHelper.getTargetNodes(self.cluster,
                                                       self.nodes_init,
                                                       self.num_nodes_affected)

        # Create new docs for sub-doc operations to run
        self.load_data_for_sub_doc_ops()

        self.log.info("Will simulate error condition on %s" % target_nodes)
        for node in target_nodes:
            # Create shell_connections
            shell_conn[node.ip] = RemoteMachineShellConnection(node)
            cbstat_obj[node.ip] = Cbstats(shell_conn[node.ip])
            active_vbs = cbstat_obj[node.ip].vbucket_list(
                def_bucket.name, "active")
            active_vbs_in_target_nodes += active_vbs
            vb_info_info["init"][node.ip] = cbstat_obj[node.ip].vbucket_seqno(
                def_bucket.name)
            failover_info["init"][node.ip] = \
                cbstat_obj[node.ip].failover_stats(def_bucket.name)

        for node in target_nodes:
            # Perform specified action
            error_sim[node.ip] = CouchbaseError(self.log, shell_conn[node.ip])
            error_sim[node.ip].create(self.simulate_error,
                                      bucket_name=def_bucket.name)

        # Perform CRUDs with induced error scenario is active
        self.log.info("Perform 'insert', 'upsert', 'remove' mutations")
        doc_loading_task = \
            self.bucket_util.run_scenario_from_spec(
                self.task,
                self.cluster,
                self.bucket_util.buckets,
                load_spec,
                mutation_num=0,
                async_load=True)

        # Perform new scope/collection creation during doc ops in parallel
        self.__perform_collection_crud(mutation_num=1)

        # Wait for doc_loading to complete and validate the doc ops
        self.task_manager.get_task_result(doc_loading_task)
        if doc_loading_task.result is False:
            self.log_failure("Doc CRUDs failed with persistence issue")

        # Revert the induced error condition
        for node in target_nodes:
            error_sim[node.ip].revert(self.simulate_error,
                                      bucket_name=def_bucket.name)

        # Fetch latest failover stats and validate the values are updated
        self.log.info("Validating failover and seqno cbstats")
        for node in target_nodes:
            vb_info_info["afterCrud"][node.ip] = \
                cbstat_obj[node.ip].vbucket_seqno(def_bucket.name)
            failover_info["afterCrud"][node.ip] = \
                cbstat_obj[node.ip].failover_stats(def_bucket.name)

            # Failover validation
            val = \
                failover_info["init"][node.ip] \
                == failover_info["afterCrud"][node.ip]
            self.assertTrue(val, msg="Failover stats not updated")

            # Seq_no validation (High level)
            val = \
                vb_info_info["init"][node.ip] \
                != vb_info_info["afterCrud"][node.ip]
            self.assertTrue(val, msg="vbucket seq_no not updated after CRUDs")

        # Disconnect the shell connection
        for node in target_nodes:
            shell_conn[node.ip].disconnect()

        self.validate_test_failure()
        self.bucket_util._wait_for_stats_all_buckets()
        self.bucket_util.validate_docs_per_collections_all_buckets()
    def test_with_process_crash(self):
        """
        Test to make sure durability will succeed even if a node goes down
        due to crash and has enough nodes to satisfy the durability

        1. Select a node from the cluster to simulate the specified error
        2. Perform CRUD on the target bucket with given timeout
        3. Using cbstats to verify the operation succeeds
        4. Validate all mutations are succeeded

        Note: self.sdk_timeout values is considered as 'seconds'
        """
        if self.num_replicas < 2:
            self.assertTrue(False, msg="Required: num_replicas > 1")

        # Override num_of_nodes affected to 1 (Positive case)
        self.num_nodes_affected = 1

        error_sim = dict()
        shell_conn = dict()
        cbstat_obj = dict()
        failover_info = dict()
        vb_info_info = dict()
        active_vbs_in_target_nodes = list()
        failover_info["init"] = dict()
        failover_info["afterCrud"] = dict()
        vb_info_info["init"] = dict()
        vb_info_info["afterCrud"] = dict()

        self.log.info("Selecting nodes to simulate error condition")
        target_nodes = DurabilityHelper.getTargetNodes(self.cluster,
                                                       self.nodes_init,
                                                       self.num_nodes_affected)

        self.log.info("Will simulate error condition on %s" % target_nodes)
        for node in target_nodes:
            shell_conn[node.ip] = RemoteMachineShellConnection(node)
            cbstat_obj[node.ip] = Cbstats(shell_conn[node.ip])
            active_vbs_in_target_nodes += cbstat_obj[node.ip].vbucket_list(
                self.bucket.name, "active")
            vb_info_info["init"][node.ip] = cbstat_obj[node.ip].vbucket_seqno(
                self.bucket.name)
            failover_info["init"][node.ip] = \
                cbstat_obj[node.ip].failover_stats(self.bucket.name)

        # Remove active vbuckets from doc_loading to avoid errors
        load_spec = dict()
        load_spec["doc_crud"] = dict()
        load_spec["doc_crud"][
            MetaCrudParams.DocCrud.CREATE_PERCENTAGE_PER_COLLECTION] = 100
        load_spec["doc_crud"][
            MetaCrudParams.DocCrud.UPDATE_PERCENTAGE_PER_COLLECTION] = 25
        load_spec["doc_crud"][
            MetaCrudParams.DocCrud.DELETE_PERCENTAGE_PER_COLLECTION] = 25
        load_spec["doc_crud"][
            MetaCrudParams.DocCrud.COMMON_DOC_KEY] = "test_collections"
        load_spec["target_vbuckets"] = list(
            set(range(0, 1024)) ^ set(active_vbs_in_target_nodes))

        self.log.info("Perform 'create', 'update', 'delete' mutations")
        doc_loading_task = \
            self.bucket_util.run_scenario_from_spec(
                self.task,
                self.cluster,
                self.bucket_util.buckets,
                load_spec,
                mutation_num=1,
                async_load=True)

        self.sleep(5, "Wait for doc loaders to start loading data")

        for node in target_nodes:
            # Create shell_connections
            shell_conn[node.ip] = RemoteMachineShellConnection(node)

            # Perform specified action
            error_sim[node.ip] = CouchbaseError(self.log, shell_conn[node.ip])
            error_sim[node.ip].create(self.simulate_error,
                                      bucket_name=self.bucket.name)

        # Perform new scope/collection creation during doc ops in parallel
        self.__perform_collection_crud()

        # Wait for document_loader tasks to complete
        self.task_manager.get_task_result(doc_loading_task)
        self.bucket_util.validate_doc_loading_results(doc_loading_task)
        if doc_loading_task.result is False:
            self.log_failure("Doc CRUDs failed with process crash")

        if self.simulate_error \
                not in [DiskError.DISK_FULL, DiskError.DISK_FAILURE]:
            # Revert the induced error condition
            for node in target_nodes:
                error_sim[node.ip].revert(self.simulate_error,
                                          bucket_name=self.bucket.name)

                # Disconnect the shell connection
                shell_conn[node.ip].disconnect()
            self.sleep(10, "Wait for node recovery to complete")

            # In case of error with Ephemeral bucket, need to rebalance
            # to make sure data is redistributed properly
            if self.bucket_type == Bucket.Type.EPHEMERAL:
                retry_num = 0
                result = None
                while retry_num != 2:
                    result = self.task.rebalance(
                        self.servers[0:self.nodes_init], [], [])
                    if result:
                        break
                    retry_num += 1
                    self.sleep(10, "Wait before retrying rebalance")

                self.assertTrue(result, "Rebalance failed")

        # Fetch latest failover stats and validate the values are updated
        self.log.info("Validating failover and seqno cbstats")
        for node in target_nodes:
            vb_info_info["afterCrud"][node.ip] = \
                cbstat_obj[node.ip].vbucket_seqno(self.bucket.name)
            failover_info["afterCrud"][node.ip] = \
                cbstat_obj[node.ip].failover_stats(self.bucket.name)

            # Failover stat validation
            if self.simulate_error == CouchbaseError.KILL_MEMCACHED:
                val = failover_info["init"][node.ip] \
                      != failover_info["afterCrud"][node.ip]
            else:
                if self.simulate_error != CouchbaseError.STOP_MEMCACHED \
                        and self.bucket_type == Bucket.Type.EPHEMERAL:
                    val = failover_info["init"][node.ip] \
                          != failover_info["afterCrud"][node.ip]
                else:
                    val = failover_info["init"][node.ip] \
                          == failover_info["afterCrud"][node.ip]
            error_msg = "Failover stats mismatch after error condition:" \
                        " %s != %s" \
                        % (failover_info["init"][node.ip],
                           failover_info["afterCrud"][node.ip])
            self.assertTrue(val, msg=error_msg)

            # Seq_no validation (High level)
            val = \
                vb_info_info["init"][node.ip] \
                != vb_info_info["afterCrud"][node.ip]
            self.assertTrue(val, msg="vbucket seq_no not updated after CRUDs")

        # Doc count validation
        self.validate_test_failure()
        self.bucket_util.validate_docs_per_collections_all_buckets()
    def test_with_persistence_issues(self):
        """
        Test to make sure timeout is handled in durability calls
        and document CRUDs are successful even with disk related failures

        1. Select nodes from the cluster to simulate the specified error
        2. Perform CRUD on the target bucket with given timeout
        3. Using cbstats to verify the operation succeeds
        4. Validate all mutations are succeeded

        Note: self.sdk_timeout value is considered as 'seconds'
        """

        if self.durability_level in [
                Bucket.DurabilityLevel.MAJORITY_AND_PERSIST_TO_ACTIVE,
                Bucket.DurabilityLevel.PERSIST_TO_MAJORITY
        ]:
            self.log.critical("Test not valid for persistence durability")
            return

        error_sim = dict()
        shell_conn = dict()
        cbstat_obj = dict()
        failover_info = dict()
        vb_info_info = dict()
        active_vbs_in_target_nodes = list()
        failover_info["init"] = dict()
        failover_info["afterCrud"] = dict()
        vb_info_info["init"] = dict()
        vb_info_info["afterCrud"] = dict()

        self.log.info("Selecting nodes to simulate error condition")
        target_nodes = DurabilityHelper.getTargetNodes(self.cluster,
                                                       self.nodes_init,
                                                       self.num_nodes_affected)

        self.log.info("Simulate error condition on %s" % target_nodes)
        for node in target_nodes:
            shell_conn[node.ip] = RemoteMachineShellConnection(node)
            cbstat_obj[node.ip] = Cbstats(shell_conn[node.ip])
            active_vbs_in_target_nodes += cbstat_obj[node.ip].vbucket_list(
                self.bucket.name, "active")
            vb_info_info["init"][node.ip] = cbstat_obj[node.ip].vbucket_seqno(
                self.bucket.name)
            failover_info["init"][node.ip] = \
                cbstat_obj[node.ip].failover_stats(self.bucket.name)

        if self.simulate_error \
                in [DiskError.DISK_FULL, DiskError.DISK_FAILURE]:
            error_sim = DiskError(self.log,
                                  self.task_manager,
                                  self.cluster.master,
                                  target_nodes,
                                  60,
                                  0,
                                  False,
                                  120,
                                  disk_location="/data")
            error_sim.create(action=self.simulate_error)
        else:
            for node in target_nodes:
                # Create shell_connections
                shell_conn[node.ip] = RemoteMachineShellConnection(node)

                # Perform specified action
                error_sim[node.ip] = CouchbaseError(self.log,
                                                    shell_conn[node.ip])
                error_sim[node.ip].create(self.simulate_error,
                                          bucket_name=self.bucket.name)

        # Perform CRUDs with induced error scenario is active
        load_spec = dict()
        load_spec["doc_crud"] = dict()
        load_spec["doc_crud"][
            MetaCrudParams.DocCrud.CREATE_PERCENTAGE_PER_COLLECTION] = 100
        load_spec["doc_crud"][
            MetaCrudParams.DocCrud.UPDATE_PERCENTAGE_PER_COLLECTION] = 25
        load_spec["doc_crud"][
            MetaCrudParams.DocCrud.DELETE_PERCENTAGE_PER_COLLECTION] = 25
        load_spec["doc_crud"][
            MetaCrudParams.DocCrud.COMMON_DOC_KEY] = "test_collections"

        self.log.info("Perform 'create', 'update', 'delete' mutations")
        doc_loading_task = \
            self.bucket_util.run_scenario_from_spec(
                self.task,
                self.cluster,
                self.bucket_util.buckets,
                load_spec,
                mutation_num=1,
                async_load=True)

        # Perform new scope/collection creation during doc ops in parallel
        self.__perform_collection_crud(mutation_num=2)

        # Wait for doc_loading to complete and validate the doc ops
        self.task_manager.get_task_result(doc_loading_task)
        self.bucket_util.validate_doc_loading_results(doc_loading_task)
        if doc_loading_task.result is False:
            self.log_failure("Doc CRUDs failed with persistence issue")

        if self.simulate_error \
                in [DiskError.DISK_FULL, DiskError.DISK_FAILURE]:
            error_sim.revert(self.simulate_error)
        else:
            # Revert the induced error condition
            for node in target_nodes:
                error_sim[node.ip].revert(self.simulate_error,
                                          bucket_name=self.bucket.name)

                # Disconnect the shell connection
                shell_conn[node.ip].disconnect()
            self.sleep(10, "Wait for node recovery to complete")

        # Doc count validation
        self.bucket_util._wait_for_stats_all_buckets()
        self.bucket_util.validate_docs_per_collections_all_buckets()

        # Fetch latest failover stats and validate the values are updated
        self.log.info("Validating failover and seqno cbstats")
        for node in target_nodes:
            vb_info_info["afterCrud"][node.ip] = \
                cbstat_obj[node.ip].vbucket_seqno(self.bucket.name)
            failover_info["afterCrud"][node.ip] = \
                cbstat_obj[node.ip].failover_stats(self.bucket.name)

            # Failover validation
            val = \
                failover_info["init"][node.ip] \
                == failover_info["afterCrud"][node.ip]
            error_msg = "Failover stats got updated"
            self.assertTrue(val, msg=error_msg)

            # Seq_no validation (High level)
            val = \
                vb_info_info["init"][node.ip] \
                != vb_info_info["afterCrud"][node.ip]
            self.assertTrue(val, msg="vbucket seq_no not updated after CRUDs")

        self.validate_test_failure()

        # Doc count validation
        self.bucket_util._wait_for_stats_all_buckets()
        self.bucket_util.validate_docs_per_collections_all_buckets()
    def test_timeout_with_crud_failures(self):
        """
        Test to make sure timeout is handled in durability calls
        and no documents are loaded when durability cannot be met using
        error simulation in server node side

        This will validate failure in majority of nodes, where durability will
        surely fail for all CRUDs

        1. Select a node from the cluster to simulate the specified error
        2. Perform CRUD on the target bucket with given timeout
        3. Using cbstats to verify no operations succeeds
        4. Revert the error scenario from the cluster to resume durability
        5. Validate all mutations are succeeded after reverting
           the error condition

        Note: self.sdk_timeout values is considered as 'seconds'
        """

        # Local methods to validate vb_seqno

        def compare_vb_stat(stat_1, stat_2, vb, comparison="!="):
            keys_to_check = ["high_seqno", "high_completed_seqno"]
            result = True
            for key in keys_to_check:
                if vb in stat_1.keys():
                    if stat_1[vb]["uuid"] != stat_2[vb]["uuid"]:
                        self.log_failure(
                            "Mismatch in vb-%s UUID. %s != %s" %
                            (vb, stat_1[vb]["uuid"], stat_2[vb]["uuid"]))
                    if comparison == "!=":
                        if stat_1[vb][key] != stat_2[vb][key]:
                            result = False
                            self.log.warning(
                                "Mismatch in vb-%s stat %s. %s != %s" %
                                (vb, key, stat_1[vb][key], stat_2[vb][key]))
                    elif stat_1[vb][key] == stat_2[vb][key]:
                        result = False
                        self.log.warning(
                            "Stat not updated for vb-%s stat %s. "
                            "%s == %s" %
                            (vb, key, stat_1[vb][key], stat_2[vb][key]))
            return result

        def validate_vb_seqno_stats():
            """
            :return retry_validation: Boolean denoting to retry validation
            """
            retry_validation = False
            vb_info["post_timeout"][node.ip] = \
                cbstat_obj[node.ip].vbucket_seqno(self.bucket.name)
            for tem_vb_num in range(self.cluster_util.vbuckets):
                tem_vb_num = str(tem_vb_num)
                if tem_vb_num not in affected_vbs:
                    if compare_vb_stat(vb_info["init"][node.ip],
                                       vb_info["post_timeout"][node.ip],
                                       tem_vb_num) is False:
                        self.log_failure("Unaffected vb-%s stat" % tem_vb_num)
                elif int(tem_vb_num) in target_nodes_vbuckets["active"]:
                    if compare_vb_stat(vb_info["init"][node.ip],
                                       vb_info["post_timeout"][node.ip],
                                       tem_vb_num) is False:
                        self.log.warning("%s - mismatch in %s vb-%s seq_no" %
                                         (node.ip, "active", tem_vb_num))
                elif int(tem_vb_num) in target_nodes_vbuckets["replica"]:
                    if compare_vb_stat(vb_info["init"][node.ip],
                                       vb_info["post_timeout"][node.ip],
                                       tem_vb_num,
                                       comparison="==") is False:
                        retry_validation = True
                        self.log.warning("%s - mismatch in %s vb-%s seq_no" %
                                         (node.ip, "replica", tem_vb_num))
            return retry_validation

        shell_conn = dict()
        cbstat_obj = dict()
        error_sim = dict()
        target_nodes_vbuckets = dict()
        vb_info = dict()
        tasks = dict()
        doc_gen = dict()
        affected_vbs = list()

        target_nodes_vbuckets["active"] = []
        target_nodes_vbuckets["replica"] = []
        vb_info["init"] = dict()
        vb_info["post_timeout"] = dict()
        vb_info["afterCrud"] = dict()

        # Override crud_batch_size to minimum value for testing
        self.crud_batch_size = 5
        self.key = "test_collections"
        self.sdk_timeout = 3

        # Select target vbucket type to load_docs
        target_vb_type = "replica"
        if self.simulate_error == CouchbaseError.STOP_PERSISTENCE \
                and self.durability_level \
                == Bucket.DurabilityLevel.MAJORITY_AND_PERSIST_TO_ACTIVE:
            target_vb_type = "active"

        # Create required scope/collection for successful CRUD operation
        if self.scope_name != CbServer.default_scope:
            self.scope_name = self.bucket_util.get_random_name()
        self.collection_name = self.bucket_util.get_random_name()
        self.log.info("Creating scope::collection %s::%s" %
                      (self.scope_name, self.collection_name))
        self.create_scope_collection()

        # Load docs into created collection
        self.log.info("Loading data into created collection")
        load_gen = doc_generator(self.key, 0, self.num_items)
        task = self.task.async_load_gen_docs(
            self.cluster,
            self.bucket,
            load_gen,
            "create",
            0,
            scope=self.scope_name,
            collection=self.collection_name,
            sdk_client_pool=self.sdk_client_pool,
            batch_size=200,
            process_concurrency=8,
            timeout_secs=60)
        self.task_manager.get_task_result(task)
        if self.subdoc_test:
            load_gen = sub_doc_generator(self.key, 0, self.num_items / 2)
            task = self.task.async_load_gen_sub_docs(
                self.cluster,
                self.bucket,
                load_gen,
                Bucket_Op.SubDocOps.INSERT,
                timeout_secs=self.sdk_timeout,
                compression=self.sdk_compression,
                path_create=True,
                batch_size=100,
                process_concurrency=8,
                durability=self.durability_level,
                scope=self.scope_name,
                collection=self.collection_name,
                sdk_client_pool=self.sdk_client_pool)
            self.task_manager.get_task_result(task)

        self.bucket.scopes[self.scope_name].collections[
            self.collection_name].num_items = self.num_items

        target_nodes = DurabilityHelper.getTargetNodes(self.cluster,
                                                       self.nodes_init,
                                                       self.num_nodes_affected)
        for node in target_nodes:
            shell_conn[node.ip] = RemoteMachineShellConnection(node)
            cbstat_obj[node.ip] = Cbstats(shell_conn[node.ip])
            target_nodes_vbuckets["active"] += \
                cbstat_obj[node.ip].vbucket_list(self.bucket.name,
                                                 vbucket_type="active")
            target_nodes_vbuckets["replica"] += \
                cbstat_obj[node.ip].vbucket_list(self.bucket.name,
                                                 vbucket_type="replica")
            vb_info["init"][node.ip] = cbstat_obj[node.ip].vbucket_seqno(
                self.bucket.name)
            error_sim[node.ip] = CouchbaseError(self.log, shell_conn[node.ip])

        curr_time = int(time.time())
        expected_timeout = curr_time + self.sdk_timeout

        if target_vb_type == "active":
            target_vbs = list(
                set(target_nodes_vbuckets[target_vb_type]).difference(
                    set(target_nodes_vbuckets["replica"])))
        else:
            target_vbs = list(
                set(target_nodes_vbuckets[target_vb_type]).difference(
                    set(target_nodes_vbuckets["active"])))

        # Create required doc_generators
        doc_gen["create"] = doc_generator(self.key,
                                          self.num_items,
                                          self.crud_batch_size,
                                          target_vbucket=target_vbs)
        doc_gen["delete"] = doc_generator(self.key,
                                          0,
                                          self.crud_batch_size,
                                          target_vbucket=target_vbs)
        doc_gen["read"] = doc_generator(self.key,
                                        int(self.num_items / 3),
                                        self.crud_batch_size,
                                        target_vbucket=target_vbs)
        doc_gen["update"] = doc_generator(self.key,
                                          int(self.num_items / 2),
                                          self.crud_batch_size,
                                          target_vbucket=target_vbs)

        # Create required subdoc generators
        doc_gen["insert"] = sub_doc_generator(self.key,
                                              int(self.num_items / 2),
                                              self.crud_batch_size,
                                              target_vbucket=target_vbs)
        doc_gen["upsert"] = sub_doc_generator_for_edit(
            self.key,
            0,
            self.crud_batch_size,
            template_index=1,
            target_vbucket=target_vbs)
        doc_gen["remove"] = sub_doc_generator(self.key,
                                              0,
                                              self.crud_batch_size,
                                              target_vbucket=target_vbs)

        # Perform specified action
        for node in target_nodes:
            error_sim[node.ip].create(self.simulate_error,
                                      bucket_name=self.bucket.name)
        self.sleep(5, "Wait for error_simulation to take effect")

        ops_to_perform = [
            Bucket_Op.DocOps.CREATE, Bucket_Op.DocOps.UPDATE,
            Bucket_Op.DocOps.READ, Bucket_Op.DocOps.DELETE
        ]
        if self.subdoc_test:
            ops_to_perform = [
                Bucket_Op.SubDocOps.INSERT, Bucket_Op.SubDocOps.UPSERT,
                Bucket_Op.SubDocOps.REMOVE
            ]

        for op_type in ops_to_perform:
            self.log.info("Starting doc op %s" % op_type)
            if op_type in Bucket_Op.DOC_OPS:
                tasks[op_type] = self.task.async_load_gen_docs(
                    self.cluster,
                    self.bucket,
                    doc_gen[op_type],
                    op_type,
                    0,
                    scope=self.scope_name,
                    collection=self.collection_name,
                    sdk_client_pool=self.sdk_client_pool,
                    batch_size=1,
                    process_concurrency=8,
                    durability=self.durability_level,
                    timeout_secs=self.sdk_timeout,
                    suppress_error_table=True,
                    print_ops_rate=False,
                    skip_read_on_error=True)
            else:
                tasks[op_type] = self.task.async_load_gen_sub_docs(
                    self.cluster,
                    self.bucket,
                    doc_gen[op_type],
                    op_type,
                    0,
                    scope=self.scope_name,
                    collection=self.collection_name,
                    sdk_client_pool=self.sdk_client_pool,
                    path_create=True,
                    batch_size=1,
                    process_concurrency=8,
                    durability=self.durability_level,
                    timeout_secs=self.sdk_timeout,
                    print_ops_rate=False)

            self.task.jython_task_manager.get_task_result(tasks[op_type])

            # Validate task failures
            if op_type == Bucket_Op.DocOps.READ:
                # Validation for read task
                if len(tasks[op_type].fail.keys()) != 0:
                    self.log_failure("Read failed for few docs: %s" %
                                     tasks[op_type].fail.keys())
            else:
                # Validation of CRUDs - Update / Create / Delete
                for doc_id, crud_result in tasks[op_type].fail.items():
                    vb_num = self.bucket_util.get_vbucket_num_for_key(
                        doc_id, self.cluster_util.vbuckets)
                    if SDKException.DurabilityAmbiguousException \
                            not in str(crud_result["error"]):
                        self.log_failure(
                            "Invalid exception for doc %s, vb %s: %s" %
                            (doc_id, vb_num, crud_result))

        # Revert the specified error scenario
        for node in target_nodes:
            error_sim[node.ip].revert(self.simulate_error,
                                      bucket_name=self.bucket.name)

        # Check whether the timeout triggered properly
        if int(time.time()) < expected_timeout:
            self.log_failure("Timed-out before expected time")

        for op_type in ops_to_perform:
            if op_type == Bucket_Op.DocOps.READ:
                continue
            while doc_gen[op_type].has_next():
                doc_id, _ = doc_gen[op_type].next()
                affected_vbs.append(
                    str(
                        self.bucket_util.get_vbucket_num_for_key(
                            doc_id, self.cluster_util.vbuckets)))

        affected_vbs = list(set(affected_vbs))
        # Fetch latest stats and validate the seq_nos are not updated
        for node in target_nodes:
            retry_count = 0
            max_retry = 3
            while retry_count < max_retry:
                self.log.info("Trying to validate vbseq_no stats: %d" %
                              (retry_count + 1))
                retry_count += 1
                retry_required = validate_vb_seqno_stats()
                if not retry_required:
                    break
                self.sleep(5, "Sleep for vbseq_no stats to update")
            else:
                # This will be exited only if `break` condition is not met
                self.log_failure("validate_vb_seqno_stats verification failed")

        self.validate_test_failure()

        # Get SDK Client from client_pool
        sdk_client = self.sdk_client_pool.get_client_for_bucket(
            self.bucket, self.scope_name, self.collection_name)

        # Doc error validation
        for op_type in ops_to_perform:
            task = tasks[op_type]

            if self.nodes_init == 1 \
                    and op_type != Bucket_Op.DocOps.READ \
                    and len(task.fail.keys()) != (doc_gen[op_type].end
                                                  - doc_gen[op_type].start):
                self.log_failure(
                    "Failed keys %d are less than expected %d" %
                    (len(task.fail.keys()),
                     (doc_gen[op_type].end - doc_gen[op_type].start)))

            # Create table objects for display
            table_view = TableView(self.log.error)
            ambiguous_table_view = TableView(self.log.info)
            table_view.set_headers(["Key", "vBucket", "Exception"])
            ambiguous_table_view.set_headers(["Key", "vBucket"])

            # Iterate failed keys for validation
            for doc_key, doc_info in task.fail.items():
                vb_for_key = self.bucket_util.get_vbucket_num_for_key(doc_key)

                if SDKException.DurabilityAmbiguousException \
                        not in str(doc_info["error"]):
                    table_view.add_row(
                        [doc_key, vb_for_key, doc_info["error"]])

                ambiguous_table_view.add_row([doc_key, str(vb_for_key)])
                if op_type not in Bucket_Op.SUB_DOC_OPS:
                    retry_success = \
                        self.durability_helper.retry_for_ambiguous_exception(
                            sdk_client, op_type, doc_key, doc_info)
                    if not retry_success:
                        self.log_failure("%s failed in retry for %s" %
                                         (op_type, doc_key))

            # Display the tables (if any errors)
            table_view.display("Unexpected exception during %s" % op_type)
            ambiguous_table_view.display("D_Ambiguous exception during %s" %
                                         op_type)

        # Release the acquired client
        self.sdk_client_pool.release_client(sdk_client)

        # Verify doc count after expected CRUD failure
        self.bucket_util._wait_for_stats_all_buckets()
        self.bucket_util.validate_docs_per_collections_all_buckets()

        # Fetch latest stats and validate the values are updated
        for node in target_nodes:
            vb_info["afterCrud"][node.ip] = \
                cbstat_obj[node.ip].vbucket_seqno(self.bucket.name)
            if vb_info["init"][node.ip] == vb_info["afterCrud"][node.ip]:
                self.log_failure("vBucket seq_no stats not updated")

        # Disconnect the shell connection
        for node in target_nodes:
            shell_conn[node.ip].disconnect()

        self.validate_test_failure()
    def test_timeout_with_successful_crud(self):
        """
        Test to make sure timeout is handled in durability calls
        and no documents are loaded when durability cannot be met using
        error simulation in server node side.

        This will validate failure in majority of nodes, where durability will
        surely fail for all CRUDs

        1. Select a node from the cluster to simulate the specified error
        2. Perform CRUD on the target bucket with given timeout
        3. Using cbstats to verify no operation succeeds
        4. Revert the error scenario from the cluster to resume durability
        5. Validate all mutations are succeeded after reverting
           the error condition

        Note: self.sdk_timeout values is considered as 'seconds'
        """

        shell_conn = dict()
        cbstat_obj = dict()
        error_sim = dict()
        vb_info = dict()
        vb_info["init"] = dict()
        vb_info["afterCrud"] = dict()

        target_nodes = DurabilityHelper.getTargetNodes(self.cluster,
                                                       self.nodes_init,
                                                       self.num_nodes_affected)
        for node in target_nodes:
            shell_conn[node.ip] = RemoteMachineShellConnection(node)
            cbstat_obj[node.ip] = Cbstats(shell_conn[node.ip])
            vb_info["init"][node.ip] = cbstat_obj[node.ip].vbucket_seqno(
                self.bucket.name)
            error_sim[node.ip] = CouchbaseError(self.log, shell_conn[node.ip])

        doc_load_spec = dict()
        doc_load_spec[MetaCrudParams.SDK_TIMEOUT] = self.sdk_timeout
        doc_load_spec[MetaCrudParams.DURABILITY_LEVEL] = self.durability_level
        doc_load_spec["doc_crud"] = dict()
        doc_load_spec["subdoc_crud"] = dict()
        doc_load_spec["doc_crud"][MetaCrudParams.DocCrud.COMMON_DOC_KEY] = \
            "test_collections"
        doc_load_spec["doc_crud"][
            MetaCrudParams.DocCrud.CREATE_PERCENTAGE_PER_COLLECTION] = 0
        doc_load_spec["doc_crud"][
            MetaCrudParams.DocCrud.UPDATE_PERCENTAGE_PER_COLLECTION] = 0
        doc_load_spec["doc_crud"][
            MetaCrudParams.DocCrud.DELETE_PERCENTAGE_PER_COLLECTION] = 0

        doc_load_spec["subdoc_crud"][
            MetaCrudParams.SubDocCrud.INSERT_PER_COLLECTION] = 0
        doc_load_spec["subdoc_crud"][
            MetaCrudParams.SubDocCrud.UPSERT_PER_COLLECTION] = 0
        doc_load_spec["subdoc_crud"][
            MetaCrudParams.SubDocCrud.REMOVE_PER_COLLECTION] = 0

        ops_to_perform = ["create", "update", "read", "replace", "delete"]
        if self.subdoc_test:
            ops_to_perform = ["insert", "upsert", "remove"]

        for op_type in ops_to_perform:
            self.log.info("Performing '%s' with timeout=%s" %
                          (op_type, self.sdk_timeout))
            curr_spec = deepcopy(doc_load_spec)
            if op_type == "create":
                curr_spec["doc_crud"][
                    MetaCrudParams.DocCrud.CREATE_PERCENTAGE_PER_COLLECTION] \
                    = 5
            elif op_type == "update":
                curr_spec["doc_crud"][
                    MetaCrudParams.DocCrud.UPDATE_PERCENTAGE_PER_COLLECTION] \
                    = 5
            elif op_type == "delete":
                curr_spec["doc_crud"][
                    MetaCrudParams.DocCrud.DELETE_PERCENTAGE_PER_COLLECTION] \
                    = 5
            elif op_type == "read":
                curr_spec["doc_crud"][
                    MetaCrudParams.DocCrud.READ_PERCENTAGE_PER_COLLECTION] = 5
                curr_spec[MetaCrudParams.RETRY_EXCEPTIONS] = [
                    SDKException.TimeoutException
                ]
            elif op_type == "insert":
                curr_spec["subdoc_crud"][
                    MetaCrudParams.SubDocCrud.INSERT_PER_COLLECTION] = 5
            elif op_type == "upsert":
                curr_spec["subdoc_crud"][
                    MetaCrudParams.SubDocCrud.UPSERT_PER_COLLECTION] = 5
            elif op_type == "remove":
                curr_spec["subdoc_crud"][
                    MetaCrudParams.SubDocCrud.REMOVE_PER_COLLECTION] = 5

            doc_loading_task = \
                self.bucket_util.run_scenario_from_spec(
                    self.task,
                    self.cluster,
                    self.bucket_util.buckets,
                    curr_spec,
                    mutation_num=1,
                    async_load=True,
                    validate_task=False)

            # Perform specified action
            for node in target_nodes:
                error_sim[node.ip].create(self.simulate_error,
                                          bucket_name=self.bucket.name)

            self.sleep(10, "Wait before reverting the error condition")

            # Revert the specified error scenario
            for node in target_nodes:
                error_sim[node.ip].revert(self.simulate_error,
                                          bucket_name=self.bucket.name)

            self.task_manager.get_task_result(doc_loading_task)
            self.bucket_util.validate_doc_loading_results(doc_loading_task)
            if doc_loading_task.result is False:
                self.fail("Doc_loading for '%s' failed" % op_type)

            # Fetch latest stats and validate the values are updated
            for node in target_nodes:
                curr_stat = cbstat_obj[node.ip].vbucket_seqno(self.bucket.name)
                if vb_info["init"][node.ip] == curr_stat:
                    self.log_failure("vbucket_seqno not updated. %s == %s" %
                                     (vb_info["init"][node.ip], curr_stat))

        # Disconnect the shell connection
        for node in target_nodes:
            shell_conn[node.ip].disconnect()

        # Verify initial doc load count
        self.bucket_util._wait_for_stats_all_buckets()
        self.bucket_util.validate_docs_per_collections_all_buckets()
        self.validate_test_failure()