Ejemplo n.º 1
0
class CrashTest(CollectionBase):
    def setUp(self):
        super(CrashTest, self).setUp()

        self.doc_ops = self.input.param("doc_ops", None)
        self.process_name = self.input.param("process", None)
        self.service_name = self.input.param("service", "data")
        self.sig_type = self.input.param("sig_type", "SIGKILL").upper()
        self.target_node = self.input.param("target_node", "active")
        self.client_type = self.input.param("client_type", "sdk").lower()
        self.N1qltxn = self.input.param("N1qltxn", False)

        self.pre_warmup_stats = dict()
        self.timeout = 120
        self.new_docs_to_add = 10000

        if self.doc_ops is not None:
            self.doc_ops = self.doc_ops.split(";")

        if not self.atomicity:
            self.durability_helper = DurabilityHelper(
                self.log, self.nodes_init,
                durability=self.durability_level,
                replicate_to=self.replicate_to,
                persist_to=self.persist_to)

        verification_dict = dict()
        verification_dict["ops_create"] = \
            self.cluster.buckets[0].scopes[
                CbServer.default_scope].collections[
                CbServer.default_collection].num_items
        verification_dict["sync_write_aborted_count"] = 0
        verification_dict["rollback_item_count"] = 0
        verification_dict["pending_writes"] = 0
        if self.durability_level:
            verification_dict["sync_write_committed_count"] = \
                verification_dict["ops_create"]

        # Load initial documents into the buckets
        transaction_gen_create = doc_generator(
            "transaction_key", 0, self.num_items,
            key_size=self.key_size,
            doc_size=self.doc_size,
            doc_type=self.doc_type,
            target_vbucket=self.target_vbucket,
            vbuckets=self.cluster_util.vbuckets)
        gen_create = doc_generator(
            self.key, 0, self.num_items,
            key_size=self.key_size,
            doc_size=self.doc_size,
            doc_type=self.doc_type,
            target_vbucket=self.target_vbucket,
            vbuckets=self.cluster_util.vbuckets)
        if self.atomicity:
            transaction_task = self.task.async_load_gen_docs_atomicity(
                self.cluster, self.cluster.buckets,
                transaction_gen_create, DocLoading.Bucket.DocOps.CREATE,
                exp=0,
                batch_size=10,
                process_concurrency=self.process_concurrency,
                replicate_to=self.replicate_to,
                persist_to=self.persist_to,
                durability=self.durability_level,
                timeout_secs=self.sdk_timeout,
                update_count=self.update_count,
                transaction_timeout=self.transaction_timeout,
                commit=True,
                sync=self.sync)
            self.task.jython_task_manager.get_task_result(transaction_task)
        for bucket in self.cluster.buckets:
            task = self.task.async_load_gen_docs(
                self.cluster, bucket, gen_create,
                DocLoading.Bucket.DocOps.CREATE, self.maxttl,
                persist_to=self.persist_to,
                replicate_to=self.replicate_to,
                durability=self.durability_level,
                batch_size=10, process_concurrency=8)
            self.task.jython_task_manager.get_task_result(task)
            self.bucket_util._wait_for_stats_all_buckets(self.cluster.buckets)

            self.cluster.buckets[0].scopes[
                CbServer.default_scope].collections[
                CbServer.default_collection].num_items += self.num_items
            verification_dict["ops_create"] += self.num_items
            if self.durability_level:
                verification_dict["sync_write_committed_count"] += \
                    self.num_items
            # Verify cbstats vbucket-details
            stats_failed = self.durability_helper.verify_vbucket_details_stats(
                bucket, self.cluster_util.get_kv_nodes(),
                vbuckets=self.cluster_util.vbuckets,
                expected_val=verification_dict)

            if self.atomicity is False:
                if stats_failed:
                    self.fail("Cbstats verification failed")
                self.bucket_util.verify_stats_all_buckets(
                    self.cluster,
                    self.cluster.buckets[0].scopes[
                        CbServer.default_scope].collections[
                        CbServer.default_collection].num_items)
        self.bucket = self.cluster.buckets[0]
        if self.N1qltxn:
            self.n1ql_server = self.cluster_util.get_nodes_from_services_map(
                                service_type="n1ql",
                                get_all_nodes=True)
            self.n1ql_helper = N1QLHelper(server=self.n1ql_server,
                                          use_rest=True,
                                          buckets=self.cluster.buckets,
                                          log=self.log,
                                          scan_consistency='REQUEST_PLUS',
                                          num_collection=3,
                                          num_buckets=1,
                                          num_savepoints=1,
                                          override_savepoint=False,
                                          num_stmt=10,
                                          load_spec=self.data_spec_name)
            self.bucket_col = self.n1ql_helper.get_collections()
            self.stmts = self.n1ql_helper.get_stmt(self.bucket_col)
            self.stmts = self.n1ql_helper.create_full_stmts(self.stmts)
        self.log.info("==========Finished CrashTest setup========")

    def tearDown(self):
        super(CrashTest, self).tearDown()

    def getTargetNode(self):
        if len(self.cluster.nodes_in_cluster) > 1:
            return self.cluster.nodes_in_cluster[randint(0, self.nodes_init-1)]
        return self.cluster.master

    def start_doc_loading_tasks(self, target_vbuckets,
                                scope_name, collection_obj):
        # Create doc_generator targeting only the active/replica vbuckets
        # present in the target_node
        transaction_gen_load = doc_generator(
            "transaction_key", self.num_items, self.new_docs_to_add,
            key_size=self.key_size,
            doc_size=self.doc_size,
            doc_type=self.doc_type,
            target_vbucket=target_vbuckets,
            vbuckets=self.cluster_util.vbuckets)
        gen_load = doc_generator(
            self.key, self.num_items, self.new_docs_to_add,
            key_size=self.key_size,
            doc_size=self.doc_size,
            doc_type=self.doc_type,
            target_vbucket=target_vbuckets,
            vbuckets=self.cluster_util.vbuckets)
        if self.atomicity:
            self.transaction_load_task = \
                self.task.async_load_gen_docs_atomicity(
                    self.cluster, self.cluster.buckets,
                    transaction_gen_load, DocLoading.Bucket.DocOps.CREATE,
                    exp=0,
                    batch_size=10,
                    process_concurrency=self.process_concurrency,
                    replicate_to=self.replicate_to,
                    persist_to=self.persist_to,
                    durability=self.durability_level,
                    timeout_secs=self.sdk_timeout,
                    update_count=self.update_count,
                    transaction_timeout=self.transaction_timeout,
                    commit=True,
                    sync=self.sync)
            collection_obj.num_items += self.new_docs_to_add
        elif self.N1qltxn:
            self.N1ql_load_task = self.task.async_n1qlTxn_query(
                self.stmts,
                n1ql_helper=self.n1ql_helper,
                commit=True,
                scan_consistency="REQUEST_PLUS")
        self.doc_loading_task = self.task.async_load_gen_docs(
            self.cluster, self.bucket, gen_load,
            DocLoading.Bucket.DocOps.CREATE,
            exp=0,
            batch_size=10,
            process_concurrency=8,
            replicate_to=self.replicate_to,
            persist_to=self.persist_to,
            durability=self.durability_level,
            timeout_secs=self.sdk_timeout,
            scope=scope_name, collection=collection_obj.name,
            skip_read_on_error=True)
        collection_obj.num_items += self.new_docs_to_add

    @staticmethod
    def getVbucketNumbers(shell_conn, bucket_name, replica_type):
        cb_stats = Cbstats(shell_conn)
        return cb_stats.vbucket_list(bucket_name, replica_type)

    def test_create_remove_scope_with_node_crash(self):
        """
        1. Select a error scenario to simulate in random
        2. Create error scenario either before or after scope create/delete
        3. Initiate scope creation/deletion under the bucket
        4. Validate the outcome of scope creation/deletion
        """
        def create_scope(client_type, bucket_obj, scope):
            if client_type == "sdk":
                client.create_scope(scope)
                self.bucket_util.create_scope_object(bucket_obj,
                                                     {"name": scope})
            elif client_type == "rest":
                self.bucket_util.create_scope(self.cluster.master, bucket_obj,
                                              {"name": scope})
            else:
                self.log_failure("Invalid client_type provided")

        def remove_scope(client_type, bucket_obj, scope):
            if client_type == "sdk":
                client.drop_scope(scope)
                self.bucket_util.mark_scope_as_dropped(bucket_obj, scope)
            elif client_type == "rest":
                self.bucket_util.drop_scope(self.cluster.master,
                                            bucket_obj,
                                            scope)
            else:
                self.log_failure("Invalid client_type provided")

        kv_nodes = self.cluster_util.get_kv_nodes()
        if len(kv_nodes) == 1:
            self.fail("Need atleast two KV nodes to run this test")

        client = None
        task = None
        action = self.input.param("action", "create")
        crash_during = self.input.param("crash_during", "pre_action")
        data_load_option = self.input.param("data_load_option", None)
        crash_type = self.input.param("simulate_error",
                                      CouchbaseError.KILL_MEMCACHED)

        # Always use a random scope name to create/remove
        # since CREATE/DROP not supported for default scope
        self.scope_name = \
            BucketUtils.get_random_name(max_length=CbServer.max_scope_name_len)

        # Select a KV node other than master node from the cluster
        node_to_crash = kv_nodes[sample(range(1, len(kv_nodes)), 1)[0]]

        client = self.sdk_client_pool.get_client_for_bucket(self.bucket)
        use_client = sample(["sdk", "rest"], 1)[0]
        if action == "remove":
            # Create a scope to be removed
            create_scope(use_client, self.bucket, self.scope_name)

        # Create a error scenario
        shell = RemoteMachineShellConnection(node_to_crash)
        cb_error = CouchbaseError(self.log, shell)
        cbstat_obj = Cbstats(shell)
        active_vbs = cbstat_obj.vbucket_list(self.bucket.name,
                                             vbucket_type="active")
        target_vbuckets = list(
            set(range(0, 1024)).difference(set(active_vbs)))
        doc_gen = doc_generator(self.key, 0, 1000,
                                target_vbucket=target_vbuckets)

        if crash_during == "pre_action":
            cb_error.create(crash_type)

        if data_load_option == "mutate_default_collection":
            task = self.task.async_load_gen_docs(
                self.cluster, self.bucket, doc_gen,
                DocLoading.Bucket.DocOps.UPDATE,
                exp=self.maxttl,
                batch_size=200, process_concurrency=4,
                compression=self.sdk_compression,
                durability=self.durability_level,
                timeout_secs=self.sdk_timeout,
                sdk_client_pool=self.sdk_client_pool)

        if action == "create":
            create_scope(self.client_type, self.bucket, self.scope_name)
        elif action == "remove":
            remove_scope(self.client_type, self.bucket, self.scope_name)

        if crash_during == "post_action":
            cb_error.create(crash_type)

        self.sleep(60, "Wait before reverting the error scenario")
        cb_error.revert(crash_type)

        if data_load_option == "mutate_default_collection":
            self.task_manager.get_task_result(task)

        # Close SSH and SDK connections
        shell.disconnect()
        if self.atomicity is False:
            self.bucket_util.validate_docs_per_collections_all_buckets(
                self.cluster)
        self.validate_test_failure()

    def test_create_remove_collection_with_node_crash(self):
        """
        1. Select a error scenario to simulate in random
        2. Create error scenario either before or after collection action
        3. Initiate collection creation/deletion under the bucket
        4. Validate the outcome of collection creation/deletion
        """
        def create_collection(client_type, bucket_obj, scope, collection):
            if client_type == "sdk":
                client.create_collection(collection, scope)
                self.bucket_util.create_collection_object(bucket_obj, scope,
                                                          {"name": collection})
            elif client_type == "rest":
                self.bucket_util.create_collection(self.cluster.master,
                                                   bucket_obj,
                                                   scope,
                                                   {"name": collection})
            else:
                self.log_failure("Invalid client_type provided")

        def remove_collection(client_type, bucket_obj, scope, collection):
            if client_type == "sdk":
                client.drop_collection(scope, collection)
                self.bucket_util.mark_collection_as_dropped(bucket_obj, scope,
                                                            collection)
            elif client_type == "rest":
                self.bucket_util.drop_collection(self.cluster.master,
                                                 bucket_obj, scope, collection)
            else:
                self.log_failure("Invalid client_type provided")

        kv_nodes = self.cluster_util.get_kv_nodes()
        if len(kv_nodes) == 1:
            self.fail("Need atleast two KV nodes to run this test")

        client = None
        task = None
        action = self.input.param("action", "create")
        crash_during = self.input.param("crash_during", "pre_action")
        data_load_option = self.input.param("data_load_option", None)
        crash_type = self.input.param("simulate_error",
                                      CouchbaseError.KILL_MEMCACHED)

        if self.scope_name != CbServer.default_scope:
            self.scope_name = \
                BucketUtils.get_random_name(
                    max_length=CbServer.max_scope_name_len)
            self.bucket_util.create_scope(self.cluster.master, self.bucket,
                                          {"name": self.scope_name})
        if self.collection_name != CbServer.default_collection:
            self.collection_name = \
                BucketUtils.get_random_name(
                    max_length=CbServer.max_collection_name_len)

        # Select a KV node other than master node from the cluster
        node_to_crash = kv_nodes[sample(range(1, len(kv_nodes)), 1)[0]]

        client = self.sdk_client_pool.get_client_for_bucket(self.bucket)
        use_client = sample(["sdk", "rest"], 1)[0]

        if action == "remove" \
                and self.collection_name != CbServer.default_collection:
            # Create a collection to be removed
            create_collection(use_client, self.bucket,
                              self.scope_name, self.collection_name)

        # Create a error scenario
        self.log.info("Selected scenario for test '%s'" % crash_type)
        shell = RemoteMachineShellConnection(node_to_crash)
        cb_error = CouchbaseError(self.log, shell)
        cbstat_obj = Cbstats(shell)
        active_vbs = cbstat_obj.vbucket_list(self.bucket.name,
                                             vbucket_type="active")
        target_vbuckets = list(
            set(range(0, 1024)).difference(set(active_vbs)))
        doc_gen = doc_generator(self.key, 0, 1000,
                                target_vbucket=target_vbuckets)

        if crash_during == "pre_action":
            cb_error.create(crash_type)

        if data_load_option == "mutate_default_collection":
            task = self.task.async_load_gen_docs(
                self.cluster, self.bucket, doc_gen,
                DocLoading.Bucket.DocOps.UPDATE,
                exp=self.maxttl,
                batch_size=200, process_concurrency=8,
                compression=self.sdk_compression,
                durability=self.durability_level,
                timeout_secs=self.sdk_timeout)

        if action == "create":
            create_collection(self.client_type, self.bucket,
                              self.scope_name, self.collection_name)
        elif action == "remove":
            remove_collection(self.client_type, self.bucket,
                              self.scope_name, self.collection_name)

        if crash_during == "post_action":
            cb_error.create(crash_type)

        if data_load_option == "mutate_default_collection":
            self.task_manager.get_task_result(task)

        self.sleep(60, "Wait before reverting the error scenario")
        cb_error.revert(crash_type)

        # Close SSH and SDK connections
        shell.disconnect()
        if self.atomicity is False:
            self.bucket_util.validate_docs_per_collections_all_buckets(
                self.cluster)
        self.validate_test_failure()

    def test_stop_process(self):
        """
        1. Starting loading docs into the default bucket
        2. Stop the requested process, which will impact the
           memcached operations
        3. Wait for load bucket task to complete
        4. Validate the docs for durability
        """
        error_to_simulate = self.input.param("simulate_error", None)
        target_node = self.getTargetNode()
        remote = RemoteMachineShellConnection(target_node)
        error_sim = CouchbaseError(self.log, remote)
        target_vbuckets = CrashTest.getVbucketNumbers(
            remote, self.bucket.name, self.target_node)

        bucket_dict = BucketUtils.get_random_collections(
            self.cluster.buckets,
            req_num=1,
            consider_scopes="all",
            consider_buckets="all")

        bucket = BucketUtils.get_bucket_obj(self.cluster.buckets,
                                            bucket_dict.keys()[0])
        scope_name = bucket_dict[bucket.name]["scopes"].keys()[0]
        collection_name = bucket_dict[bucket.name][
            "scopes"][scope_name]["collections"].keys()[0]
        scope = BucketUtils.get_scope_obj(
            bucket, scope_name)
        collection = BucketUtils.get_collection_obj(scope, collection_name)

        if len(target_vbuckets) == 0:
            self.log.error("No target vbucket list generated to load data")
            remote.disconnect()
            return

        self.start_doc_loading_tasks(target_vbuckets, scope_name, collection)

        # Induce the error condition
        error_sim.create(error_to_simulate)

        self.sleep(20, "Wait before reverting the error condition")
        # Revert the simulated error condition and close the ssh session
        error_sim.revert(error_to_simulate)
        remote.disconnect()

        # Wait for doc loading task to complete
        self.task.jython_task_manager.get_task_result(self.doc_loading_task)
        if self.atomicity:
            self.task.jython_task_manager.get_task_result(
                self.transaction_load_task)
        elif self.N1qltxn:
            self.task.jython_task_manager.get_task_result(
                self.N1ql_load_task)

        if len(self.doc_loading_task.fail.keys()) != 0:
            if self.target_node == "active" or self.num_replicas in [2, 3]:
                self.log_failure("Unwanted failures for keys: %s"
                                 % self.doc_loading_task.fail.keys())

        validate_passed = \
            self.durability_helper.validate_durability_exception(
                self.doc_loading_task.fail,
                SDKException.DurabilityAmbiguousException)
        if not validate_passed:
            self.log_failure("Unwanted exception seen during validation")

        # Get SDK client for CRUD retries
        sdk_client = self.sdk_client_pool.get_client_for_bucket(self.bucket)
        for doc_key, crud_result in self.doc_loading_task.fail.items():
            result = sdk_client.crud(DocLoading.Bucket.DocOps.CREATE,
                                     doc_key,
                                     crud_result["value"],
                                     replicate_to=self.replicate_to,
                                     persist_to=self.persist_to,
                                     durability=self.durability_level,
                                     timeout=self.sdk_timeout)
            if result["status"] is False:
                self.log_failure("Retry of doc_key %s failed: %s"
                                 % (doc_key, result["error"]))
        # Close the SDK connection
        self.sdk_client_pool.release_client(sdk_client)

        self.validate_test_failure()

        self.bucket_util._wait_for_stats_all_buckets(self.cluster.buckets)
        # Update self.num_items and validate docs per collection
        if not self.N1qltxn and self.atomicity is False:
            self.bucket_util.validate_docs_per_collections_all_buckets(
                self.cluster)

    def test_crash_process(self):
        """
        1. Starting loading docs into the default bucket
        2. Crash the requested process, which will not impact the
           memcached operations
        3. Wait for load bucket task to complete
        4. Validate the docs for durability
        """
        def_bucket = self.cluster.buckets[0]
        target_node = self.getTargetNode()
        remote = RemoteMachineShellConnection(target_node)
        target_vbuckets = range(0, self.cluster_util.vbuckets)
        retry_exceptions = list()
        self.transaction_load_task = None
        self.doc_loading_task = None
        self.N1ql_load_task = None

        # If Memcached is killed, we should not perform KV ops on
        # particular node. If not we can target all nodes for KV operation.
        if self.process_name == "memcached":
            target_vbuckets = CrashTest.getVbucketNumbers(
                remote, def_bucket.name, self.target_node)
            if self.target_node == "active":
                retry_exceptions = [SDKException.TimeoutException]
        if len(target_vbuckets) == 0:
            self.log.error("No target vbucket list generated to load data")
            remote.disconnect()
            return

        bucket_dict = BucketUtils.get_random_collections(
            self.cluster.buckets,
            req_num=1,
            consider_scopes="all",
            consider_buckets="all")

        bucket = BucketUtils.get_bucket_obj(self.cluster.buckets,
                                            bucket_dict.keys()[0])
        scope_name = bucket_dict[bucket.name]["scopes"].keys()[0]
        collection_name = bucket_dict[bucket.name][
            "scopes"][scope_name]["collections"].keys()[0]
        scope = BucketUtils.get_scope_obj(
            bucket, scope_name)
        collection = BucketUtils.get_collection_obj(
            scope, collection_name)

        self.start_doc_loading_tasks(target_vbuckets, scope_name, collection)

        task_info = dict()
        task_info[self.doc_loading_task] = \
            self.bucket_util.get_doc_op_info_dict(
                def_bucket, DocLoading.Bucket.DocOps.CREATE, 0,
                replicate_to=self.replicate_to, persist_to=self.persist_to,
                durability=self.durability_level,
                timeout=self.sdk_timeout, time_unit="seconds",
                retry_exceptions=retry_exceptions)

        self.sleep(10, "Wait for doc_ops to start")
        self.log.info("Killing {0}:{1} on node {2}"
                      .format(self.process_name, self.service_name,
                              target_node.ip))
        remote.kill_process(self.process_name, self.service_name,
                            signum=signum[self.sig_type])
        remote.disconnect()
        # Wait for tasks completion and validate failures
        if self.transaction_load_task:
            self.task.jython_task_manager.get_task_result(
                self.transaction_load_task)
        if self.N1qltxn:
            self.task.jython_task_manager.get_task_result(
                self.N1ql_load_task)
        self.task_manager.get_task_result(self.doc_loading_task)
        self.bucket_util.verify_doc_op_task_exceptions(task_info,
                                                       self.cluster)
        self.bucket_util.log_doc_ops_task_failures(task_info)

        # Verification stats
        verification_dict = dict()
        verification_dict["ops_create"] = 2*self.num_items
        verification_dict["sync_write_aborted_count"] = 0
        verification_dict["rollback_item_count"] = 0
        verification_dict["pending_writes"] = 0
        if self.durability_level:
            verification_dict["sync_write_committed_count"] = 2*self.num_items

        if self.bucket_type == Bucket.Type.EPHEMERAL \
                and self.process_name == "memcached":
            result = self.task.rebalance(self.servers[:self.nodes_init],
                                         [], [])
            self.assertTrue(result, "Rebalance failed")

        # Validate doc count
        if self.process_name != "memcached":
            stats_failed = \
                self.durability_helper.verify_vbucket_details_stats(
                    def_bucket, self.cluster_util.get_kv_nodes(),
                    vbuckets=self.cluster_util.vbuckets,
                    expected_val=verification_dict)
            if stats_failed:
                self.fail("Cbstats verification failed")

        # Doc count validation per collection
        if not self.N1qltxn and self.atomicity is False:
            self.bucket_util.validate_docs_per_collections_all_buckets(
                self.cluster)
Ejemplo n.º 2
0
class BucketDurabilityBase(BaseTestCase):
    def setUp(self):
        super(BucketDurabilityBase, self).setUp()

        if len(self.cluster.servers) < self.nodes_init:
            self.fail("Not enough nodes for rebalance")

        # Rebalance-in required nodes for testing
        nodes_init = self.cluster.servers[1:self.nodes_init] \
            if self.nodes_init != 1 else []
        self.task.rebalance([self.cluster.master], nodes_init, [])
        self.cluster.nodes_in_cluster.extend([self.cluster.master]+nodes_init)

        # Disable auto-failover to avoid failover of nodes
        status = RestConnection(self.cluster.master) \
            .update_autofailover_settings(False, 120, False)
        self.assertTrue(status, msg="Failure during disabling auto-failover")
        self.bucket_util.add_rbac_user()

        self.durability_helper = DurabilityHelper(
            self.log,
            len(self.cluster.nodes_in_cluster))
        self.kv_nodes = self.cluster_util.get_kv_nodes()

        self.num_nodes_affected = 1
        if self.num_replicas > 1:
            self.num_nodes_affected = 2

        # Bucket create options representation
        self.bucket_template = dict()
        self.bucket_template[Bucket.name] = "default"
        self.bucket_template[Bucket.ramQuotaMB] = 100
        self.bucket_template[Bucket.replicaNumber] = self.num_replicas
        # These two params will be set during each iteration
        self.bucket_template[Bucket.bucketType] = None
        self.bucket_template[Bucket.durabilityMinLevel] = None

        # Print cluster stats
        self.cluster_util.print_cluster_stats()

        self.bucket_types_to_test = [Bucket.Type.MEMBASE,
                                     Bucket.Type.EPHEMERAL,
                                     Bucket.Type.MEMCACHED]

        self.d_level_order = [
            Bucket.DurabilityLevel.NONE,
            Bucket.DurabilityLevel.MAJORITY,
            Bucket.DurabilityLevel.MAJORITY_AND_PERSIST_TO_ACTIVE,
            Bucket.DurabilityLevel.PERSIST_TO_MAJORITY]

        # Dict representing the possible levels supported by each bucket type
        self.possible_d_levels = dict()
        self.possible_d_levels[Bucket.Type.MEMBASE] = \
            self.bucket_util.get_supported_durability_levels()
        self.possible_d_levels[Bucket.Type.EPHEMERAL] = [
            Bucket.DurabilityLevel.NONE,
            Bucket.DurabilityLevel.MAJORITY]
        self.possible_d_levels[Bucket.Type.MEMCACHED] = [
            Bucket.DurabilityLevel.NONE]

        # Dict to store the list of active/replica VBs in each node
        self.vbs_in_node = dict()
        for node in self.cluster_util.get_kv_nodes():
            shell = RemoteMachineShellConnection(node)
            self.vbs_in_node[node] = dict()
            self.vbs_in_node[node]["shell"] = shell
        self.log.info("===== BucketDurabilityBase setup complete =====")

    def tearDown(self):
        # Close all shell_connections opened in setUp()
        for node in self.vbs_in_node:
            self.vbs_in_node[node]["shell"].disconnect()

        super(BucketDurabilityBase, self).tearDown()

        self.summary.display()
        self.validate_test_failure()

    @staticmethod
    def get_cb_stat_verification_dict():
        verification_dict = dict()
        verification_dict["ops_create"] = 0
        verification_dict["ops_update"] = 0
        verification_dict["ops_delete"] = 0
        verification_dict["rollback_item_count"] = 0
        verification_dict["sync_write_aborted_count"] = 0
        verification_dict["sync_write_committed_count"] = 0
        return verification_dict

    def get_vbucket_type_mapping(self, bucket_name):
        for node in self.vbs_in_node.keys():
            cb_stat = Cbstats(self.vbs_in_node[node]["shell"])
            self.vbs_in_node[node]["active"] = \
                cb_stat.vbucket_list(bucket_name, "active")
            self.vbs_in_node[node]["replica"] = \
                cb_stat.vbucket_list(bucket_name, "replica")

    def get_bucket_dict(self, bucket_type, bucket_durability):

        bucket_dict = deepcopy(self.bucket_template)
        bucket_dict[Bucket.bucketType] = bucket_type
        bucket_dict[Bucket.durabilityMinLevel] = \
            BucketDurability[bucket_durability]

        return bucket_dict

    def get_supported_durability_for_bucket(self):
        if self.bucket_type == Bucket.Type.EPHEMERAL:
            return [Bucket.DurabilityLevel.NONE,
                    Bucket.DurabilityLevel.MAJORITY]
        return self.bucket_util.get_supported_durability_levels()

    def validate_durability_with_crud(
            self, bucket, bucket_durability,
            verification_dict,
            doc_start_index=0,
            num_items_to_load=1, op_type="create",
            doc_durability=Bucket.DurabilityLevel.NONE):
        """
        Common API to validate durability settings of the bucket is set
        correctly or not.

        :param bucket: Bucket object to validate
        :param bucket_durability: Durability set for the bucket
                                  Note: Need this because the string within the
                                        bucket object is different than this.
        :param verification_dict: To hold the values for req cbstats to verify
        :param doc_start_index: Starting index to be considered for doc_load
        :param num_items_to_load: Number of items to be loaded to test.
                                  Default is '1'
        :param op_type: Type of CRUD to perform. Default is 'create'
        :param doc_durability: Document durability level to use during CRUD.
                               Default level is 'None'
        :return:
        """
        def get_d_level_used():
            if self.d_level_order.index(bucket_durability) \
                    < self.d_level_order.index(doc_durability):
                return doc_durability
            return bucket_durability

        d_level_to_test = get_d_level_used()
        # Nothing to test for durability_level=None (async_write case)
        if d_level_to_test == Bucket.DurabilityLevel.NONE:
            return

        self.log.info("Performing %s operation to validate d_level %s"
                      % (op_type, d_level_to_test))

        # Can't simulate error conditions for all durability_levels.
        # So only perform CRUD without error_sim
        if len(self.vbs_in_node.keys()) > 1:
            # Pick a random node to perform error sim and load
            random_node = choice(self.vbs_in_node.keys())

            target_vb_type, simulate_error = \
                self.durability_helper.get_vb_and_error_type(d_level_to_test)

            doc_gen = doc_generator(
                self.key, doc_start_index, num_items_to_load,
                target_vbucket=self.vbs_in_node[random_node][target_vb_type])
            error_sim = CouchbaseError(self.log,
                                       self.vbs_in_node[random_node]["shell"])

            doc_load_task = self.task.async_load_gen_docs(
                self.cluster, bucket, doc_gen, op_type,
                exp=self.maxttl,
                replicate_to=self.replicate_to,
                persist_to=self.persist_to,
                durability=doc_durability,
                timeout_secs=32,
                batch_size=1,
                skip_read_on_error=True,
                suppress_error_table=True,
                start_task=False,
                sdk_client_pool=self.sdk_client_pool)

            self.sleep(5, "Wait for sdk_client to get warmed_up")
            # Simulate target error condition
            error_sim.create(simulate_error)
            self.sleep(5, "Wait for error_sim to take effect")

            # Start doc_loading task and wait for it to complete
            self.task_manager.add_new_task(doc_load_task)
            self.task_manager.get_task_result(doc_load_task)

            # Revert the induced error condition
            self.sleep(5, "Wait before reverting error_simulation")
            error_sim.revert(simulate_error)

            # Validate failed doc count and exception type from SDK
            if not doc_load_task.fail.keys():
                self.log_failure("Docs inserted without honoring the "
                                 "bucket durability level")
            for key, result in doc_load_task.fail.items():
                if SDKException.DurabilityAmbiguousException \
                        not in str(result["error"]):
                    self.log_failure("Invalid exception for key %s "
                                     "during %s operation: %s"
                                     % (key, op_type, result["error"]))

            verification_dict["sync_write_aborted_count"] += num_items_to_load
        else:
            doc_gen = doc_generator(self.key, doc_start_index,
                                    doc_start_index+num_items_to_load)

        # Retry the same CRUDs without any error simulation in place
        doc_load_task = self.task.async_load_gen_docs(
            self.cluster, bucket, doc_gen, op_type,
            exp=self.maxttl,
            durability=doc_durability,
            timeout_secs=2,
            batch_size=1,
            sdk_client_pool=self.sdk_client_pool)
        self.task_manager.get_task_result(doc_load_task)
        if doc_load_task.fail:
            self.log_failure("Failures seen during CRUD without "
                             "error simulation. Keys failed: %s"
                             % doc_load_task.fail.keys())
        else:
            verification_dict["ops_%s" % op_type] += \
                num_items_to_load
            verification_dict["sync_write_committed_count"] += \
                num_items_to_load

    def getTargetNodes(self):
        def select_randam_node(nodes):
            rand_node_index = randint(1, self.nodes_init-1)
            if self.cluster.nodes_in_cluster[rand_node_index] not in node_list:
                nodes.append(self.cluster.nodes_in_cluster[rand_node_index])

        node_list = list()
        if len(self.cluster.nodes_in_cluster) > 1:
            # Choose random nodes, if the cluster is not a single node cluster
            while len(node_list) != self.num_nodes_affected:
                select_randam_node(node_list)
        else:
            node_list.append(self.cluster.master)
        return node_list

    def cb_stat_verify(self, verification_dict):
        failed = self.durability_helper.verify_vbucket_details_stats(
            self.bucket_util.buckets[0],
            self.kv_nodes,
            vbuckets=self.cluster_util.vbuckets,
            expected_val=verification_dict)
        if failed:
            self.log_failure("Cbstat vbucket-details validation failed")
        self.summary.add_step("Cbstat vb-details validation")
Ejemplo n.º 3
0
class CrashTest(BaseTestCase):
    def setUp(self):
        super(CrashTest, self).setUp()

        self.doc_ops = self.input.param("doc_ops", None)
        self.process_name = self.input.param("process", None)
        self.service_name = self.input.param("service", "data")
        self.sig_type = self.input.param("sig_type", "SIGKILL").upper()
        self.target_node = self.input.param("target_node", "active")

        self.pre_warmup_stats = {}
        self.timeout = 120
        self.new_docs_to_add = 10000

        if self.doc_ops is not None:
            self.doc_ops = self.doc_ops.split(";")

        nodes_init = self.cluster.servers[1:self.nodes_init] \
            if self.nodes_init != 1 else []
        self.task.rebalance([self.cluster.master], nodes_init, [])
        self.cluster.nodes_in_cluster.extend([self.cluster.master] +
                                             nodes_init)
        if not self.atomicity:
            self.durability_helper = DurabilityHelper(
                self.log,
                self.nodes_init,
                durability=self.durability_level,
                replicate_to=self.replicate_to,
                persist_to=self.persist_to)
        self.bucket_util.create_default_bucket(
            bucket_type=self.bucket_type,
            ram_quota=self.bucket_size,
            replica=self.num_replicas,
            compression_mode="off",
            storage=self.bucket_storage,
            eviction_policy=self.bucket_eviction_policy)
        self.bucket_util.add_rbac_user()

        if self.sdk_client_pool:
            self.log.info("Creating SDK clients for client_pool")
            for bucket in self.bucket_util.buckets:
                self.sdk_client_pool.create_clients(
                    bucket, [self.cluster.master],
                    self.sdk_pool_capacity,
                    compression_settings=self.sdk_compression)

        verification_dict = dict()
        verification_dict["ops_create"] = self.num_items
        verification_dict["sync_write_aborted_count"] = 0
        verification_dict["rollback_item_count"] = 0
        verification_dict["pending_writes"] = 0
        if self.durability_level:
            verification_dict["sync_write_committed_count"] = self.num_items

        # Load initial documents into the buckets
        self.log.info("Loading initial documents")
        gen_create = doc_generator(self.key,
                                   0,
                                   self.num_items,
                                   key_size=self.key_size,
                                   doc_size=self.doc_size,
                                   doc_type=self.doc_type,
                                   target_vbucket=self.target_vbucket,
                                   vbuckets=self.cluster_util.vbuckets)
        if self.atomicity:
            task = self.task.async_load_gen_docs_atomicity(
                self.cluster,
                self.bucket_util.buckets,
                gen_create,
                "create",
                exp=0,
                batch_size=10,
                process_concurrency=self.process_concurrency,
                replicate_to=self.replicate_to,
                persist_to=self.persist_to,
                durability=self.durability_level,
                timeout_secs=self.sdk_timeout,
                update_count=self.update_count,
                transaction_timeout=self.transaction_timeout,
                commit=True,
                sync=self.sync)
            self.task.jython_task_manager.get_task_result(task)
        else:
            for bucket in self.bucket_util.buckets:
                task = self.task.async_load_gen_docs(
                    self.cluster,
                    bucket,
                    gen_create,
                    DocLoading.Bucket.DocOps.CREATE,
                    self.maxttl,
                    persist_to=self.persist_to,
                    replicate_to=self.replicate_to,
                    durability=self.durability_level,
                    batch_size=10,
                    process_concurrency=8,
                    sdk_client_pool=self.sdk_client_pool)
                self.task.jython_task_manager.get_task_result(task)

                self.bucket_util._wait_for_stats_all_buckets()
                # Verify cbstats vbucket-details
                stats_failed = \
                    self.durability_helper.verify_vbucket_details_stats(
                        bucket, self.cluster_util.get_kv_nodes(),
                        vbuckets=self.cluster_util.vbuckets,
                        expected_val=verification_dict)

                if stats_failed:
                    self.fail("Cbstats verification failed")

            self.bucket_util.verify_stats_all_buckets(self.num_items)
        self.cluster_util.print_cluster_stats()
        self.bucket_util.print_bucket_stats()
        self.log.info("==========Finished CrashTest setup========")

    def tearDown(self):
        super(CrashTest, self).tearDown()

    def getTargetNode(self):
        if len(self.cluster.nodes_in_cluster) > 1:
            return self.cluster.nodes_in_cluster[randint(
                0, self.nodes_init - 1)]
        return self.cluster.master

    def getVbucketNumbers(self, shell_conn, bucket_name, replica_type):
        cb_stats = Cbstats(shell_conn)
        return cb_stats.vbucket_list(bucket_name, replica_type)

    def test_stop_process(self):
        """
        1. Starting loading docs into the default bucket
        2. Stop the requested process, which will impact the
           memcached operations
        3. Wait for load bucket task to complete
        4. Validate the docs for durability
        """
        error_to_simulate = self.input.param("simulate_error", None)
        def_bucket = self.bucket_util.buckets[0]
        target_node = self.getTargetNode()
        remote = RemoteMachineShellConnection(target_node)
        error_sim = CouchbaseError(self.log, remote)
        target_vbuckets = self.getVbucketNumbers(remote, def_bucket.name,
                                                 self.target_node)
        if len(target_vbuckets) == 0:
            self.log.error("No target vbucket list generated to load data")
            remote.disconnect()
            return

        # Create doc_generator targeting only the active/replica vbuckets
        # present in the target_node
        gen_load = doc_generator(self.key,
                                 self.num_items,
                                 self.new_docs_to_add,
                                 key_size=self.key_size,
                                 doc_size=self.doc_size,
                                 doc_type=self.doc_type,
                                 target_vbucket=target_vbuckets,
                                 vbuckets=self.cluster_util.vbuckets)

        if self.atomicity:
            task = self.task.async_load_gen_docs_atomicity(
                self.cluster,
                self.bucket_util.buckets,
                gen_load,
                "create",
                exp=0,
                batch_size=10,
                process_concurrency=self.process_concurrency,
                replicate_to=self.replicate_to,
                persist_to=self.persist_to,
                durability=self.durability_level,
                timeout_secs=self.sdk_timeout,
                update_count=self.update_count,
                transaction_timeout=self.transaction_timeout,
                commit=True,
                sync=self.sync)
        else:
            task = self.task.async_load_gen_docs(
                self.cluster,
                def_bucket,
                gen_load,
                "create",
                exp=0,
                batch_size=1,
                process_concurrency=8,
                replicate_to=self.replicate_to,
                persist_to=self.persist_to,
                durability=self.durability_level,
                timeout_secs=self.sdk_timeout,
                skip_read_on_error=True)

        # Induce the error condition
        error_sim.create(error_to_simulate)

        self.sleep(20, "Wait before reverting the error condition")
        # Revert the simulated error condition and close the ssh session
        error_sim.revert(error_to_simulate)
        remote.disconnect()

        # Wait for doc loading task to complete
        self.task.jython_task_manager.get_task_result(task)
        if not self.atomicity:
            if len(task.fail.keys()) != 0:
                if self.target_node == "active" or self.num_replicas in [2, 3]:
                    self.log_failure("Unwanted failures for keys: %s" %
                                     task.fail.keys())

            validate_passed = \
                self.durability_helper.validate_durability_exception(
                    task.fail,
                    SDKException.DurabilityAmbiguousException)
            if not validate_passed:
                self.log_failure("Unwanted exception seen during validation")

            # Create SDK connection for CRUD retries
            sdk_client = SDKClient([self.cluster.master], def_bucket)
            for doc_key, crud_result in task.fail.items():
                result = sdk_client.crud("create",
                                         doc_key,
                                         crud_result["value"],
                                         replicate_to=self.replicate_to,
                                         persist_to=self.persist_to,
                                         durability=self.durability_level,
                                         timeout=self.sdk_timeout)
                if result["status"] is False:
                    self.log_failure("Retry of doc_key %s failed: %s" %
                                     (doc_key, result["error"]))
            # Close the SDK connection
            sdk_client.close()

        # Update self.num_items
        self.num_items += self.new_docs_to_add

        if not self.atomicity:
            # Validate doc count
            self.bucket_util._wait_for_stats_all_buckets()
            self.bucket_util.verify_stats_all_buckets(self.num_items)

        self.validate_test_failure()

    def test_crash_process(self):
        """
        1. Starting loading docs into the default bucket
        2. Crash the requested process, which will not impact the
           memcached operations
        3. Wait for load bucket task to complete
        4. Validate the docs for durability
        """
        def_bucket = self.bucket_util.buckets[0]
        target_node = self.getTargetNode()
        remote = RemoteMachineShellConnection(target_node)
        target_vbuckets = range(0, self.cluster_util.vbuckets)
        retry_exceptions = list()

        # If Memcached is killed, we should not perform KV ops on
        # particular node. If not we can target all nodes for KV operation.
        if self.process_name == "memcached":
            target_vbuckets = self.getVbucketNumbers(remote, def_bucket.name,
                                                     self.target_node)
            if self.target_node == "active":
                retry_exceptions = [SDKException.TimeoutException]
        if len(target_vbuckets) == 0:
            self.log.error("No target vbucket list generated to load data")
            remote.disconnect()
            return

        # Create doc_generator targeting only the active/replica vbuckets
        # present in the target_node
        gen_load = doc_generator(self.key,
                                 self.num_items,
                                 self.new_docs_to_add,
                                 key_size=self.key_size,
                                 doc_size=self.doc_size,
                                 doc_type=self.doc_type,
                                 target_vbucket=target_vbuckets,
                                 vbuckets=self.cluster_util.vbuckets)
        if self.atomicity:
            task = self.task.async_load_gen_docs_atomicity(
                self.cluster,
                self.bucket_util.buckets,
                gen_load,
                "create",
                exp=0,
                batch_size=10,
                process_concurrency=self.process_concurrency,
                replicate_to=self.replicate_to,
                persist_to=self.persist_to,
                durability=self.durability_level,
                timeout_secs=self.sdk_timeout,
                update_count=self.update_count,
                transaction_timeout=self.transaction_timeout,
                commit=True,
                sync=self.sync)
        else:
            task = self.task.async_load_gen_docs(
                self.cluster,
                def_bucket,
                gen_load,
                "create",
                exp=0,
                batch_size=10,
                process_concurrency=8,
                replicate_to=self.replicate_to,
                persist_to=self.persist_to,
                durability=self.durability_level,
                timeout_secs=self.sdk_timeout,
                skip_read_on_error=True)

        task_info = dict()
        task_info[task] = self.bucket_util.get_doc_op_info_dict(
            def_bucket,
            "create",
            0,
            replicate_to=self.replicate_to,
            persist_to=self.persist_to,
            durability=self.durability_level,
            timeout=self.sdk_timeout,
            time_unit="seconds",
            retry_exceptions=retry_exceptions)

        self.sleep(10, "Wait for doc_ops to start")
        self.log.info("Killing {0}:{1} on node {2}".format(
            self.process_name, self.service_name, target_node.ip))
        remote.kill_process(self.process_name,
                            self.service_name,
                            signum=signum[self.sig_type])
        remote.disconnect()
        # Wait for tasks completion and validate failures
        if self.atomicity:
            self.task.jython_task_manager.get_task_result(task)
        if not self.atomicity:
            self.bucket_util.verify_doc_op_task_exceptions(
                task_info, self.cluster)
            self.bucket_util.log_doc_ops_task_failures(task_info)

        # Update self.num_items
        self.num_items += self.new_docs_to_add

        # Verification stats
        verification_dict = dict()
        verification_dict["ops_create"] = self.num_items
        verification_dict["sync_write_aborted_count"] = 0
        verification_dict["rollback_item_count"] = 0
        verification_dict["pending_writes"] = 0
        if self.durability_level:
            verification_dict["sync_write_committed_count"] = self.num_items

        if self.bucket_type == Bucket.Type.EPHEMERAL \
                and self.process_name == "memcached":
            self.sleep(10, "Wait for memcached to recover from the crash")
            result = self.task.rebalance(self.servers[:self.nodes_init], [],
                                         [])
            self.assertTrue(result, "Rebalance failed")

        # Validate doc count
        if not self.atomicity:
            self.bucket_util._wait_for_stats_all_buckets()
            self.bucket_util.verify_stats_all_buckets(self.num_items)

            if self.process_name != "memcached":
                stats_failed = \
                    self.durability_helper.verify_vbucket_details_stats(
                        def_bucket, self.cluster_util.get_kv_nodes(),
                        vbuckets=self.cluster_util.vbuckets,
                        expected_val=verification_dict)
                if stats_failed:
                    self.fail("Cbstats verification failed")

    def test_process_error_on_nodes(self):
        """
        Test to validate OoO returns feature
        1. Start parallel CRUDs using single client
        2. Perform process crash / stop with doc_ops in parallel
        3. Make sure no crash or ep_eng issue is seen with the err_simulation
        """
        tasks = list()
        node_data = dict()
        bucket = self.bucket_util.buckets[0]
        revert_errors = [
            CouchbaseError.STOP_MEMCACHED, CouchbaseError.STOP_SERVER,
            CouchbaseError.STOP_BEAMSMP, CouchbaseError.STOP_PERSISTENCE
        ]
        # Overriding sdk_timeout to max
        self.sdk_timeout = 60

        # Disable auto-failover to avoid failover of nodes
        status = RestConnection(self.cluster.master) \
            .update_autofailover_settings(False, 120, False)
        self.assertTrue(status, msg="Failure during disabling auto-failover")

        # Can take 'all_nodes' / 'single node'
        crash_on = self.input.param("crash_on", "single_node")
        error_to_simulate = self.input.param("simulate_error",
                                             CouchbaseError.KILL_MEMCACHED)
        num_times_to_affect = self.input.param("times_to_affect", 20)
        nodes_to_affect = self.cluster_util.get_kv_nodes()
        if crash_on == "single_node":
            nodes_to_affect = [choice(nodes_to_affect)]

        create_gen = doc_generator(self.key, self.num_items,
                                   self.num_items * 2)
        update_gen = doc_generator(self.key, 0, self.num_items / 2)
        delete_gen = doc_generator(self.key, self.num_items / 2,
                                   self.num_items)

        for node in nodes_to_affect:
            shell = RemoteMachineShellConnection(node)
            node_data[node] = dict()
            node_data[node]["cb_err"] = CouchbaseError(self.log, shell)

        self.log.info("Starting doc-ops")
        for doc_op in self.doc_ops:
            load_gen = update_gen
            if doc_op == DocLoading.Bucket.DocOps.CREATE:
                load_gen = create_gen
            elif doc_op == DocLoading.Bucket.DocOps.DELETE:
                load_gen = delete_gen
            task = self.task.async_load_gen_docs(
                self.cluster,
                bucket,
                load_gen,
                doc_op,
                replicate_to=self.replicate_to,
                persist_to=self.persist_to,
                durability=self.durability_level,
                timeout_secs=self.sdk_timeout,
                sdk_client_pool=self.sdk_client_pool,
                batch_size=10,
                process_concurrency=1,
                skip_read_on_error=True,
                print_ops_rate=False)
            tasks.append(task)

        self.log.info("Starting error_simulation on %s" % nodes_to_affect)
        for itr in range(1, num_times_to_affect + 1):
            self.log.info("Iteration :: %d" % itr)
            for node in nodes_to_affect:
                node_data[node]["cb_err"].create(error_to_simulate,
                                                 bucket.name)
            if error_to_simulate in revert_errors:
                self.sleep(30, "Sleep before reverting the error")
                for node in nodes_to_affect:
                    node_data[node]["cb_err"].revert(error_to_simulate,
                                                     bucket.name)
            else:
                self.sleep(10, "Wait for process to come back online")

        # Wait for doc_ops to complete
        for task in tasks:
            self.task_manager.get_task_result(task)
Ejemplo n.º 4
0
class basic_ops(BaseTestCase):
    def setUp(self):
        super(basic_ops, self).setUp()

        self.doc_ops = self.input.param("doc_ops", "").split(";")
        self.observe_test = self.input.param("observe_test", False)
        # Scope/collection name can be default or create a random one to test
        self.scope_name = self.input.param("scope", CbServer.default_scope)
        self.collection_name = self.input.param("collection",
                                                CbServer.default_collection)

        nodes_init = self.cluster.servers[1:self.nodes_init] \
            if self.nodes_init != 1 else []
        self.task.rebalance([self.cluster.master], nodes_init, [])
        self.cluster.nodes_in_cluster.extend([self.cluster.master] +
                                             nodes_init)
        self.bucket_util.create_default_bucket(
            replica=self.num_replicas,
            compression_mode=self.compression_mode,
            bucket_type=self.bucket_type,
            storage=self.bucket_storage,
            eviction_policy=self.bucket_eviction_policy)
        self.bucket_util.add_rbac_user()

        # Create Scope/Collection with random names if not equal to default
        if self.scope_name != CbServer.default_scope:
            self.scope_name = self.bucket_util.get_random_name()
            self.bucket_util.create_scope(self.cluster.master,
                                          self.bucket_util.buckets[0],
                                          {"name": self.scope_name})
        if self.collection_name != CbServer.default_collection:
            self.collection_name = self.bucket_util.get_random_name()
            self.bucket_util.create_collection(
                self.cluster.master, self.bucket_util.buckets[0],
                self.scope_name, {
                    "name": self.collection_name,
                    "num_items": self.num_items
                })
            self.log.info("Using scope::collection - '%s::%s'" %
                          (self.scope_name, self.collection_name))

        # Update required num_items under default collection
        self.bucket_util.buckets[0] \
            .scopes[self.scope_name] \
            .collections[self.collection_name] \
            .num_items = self.num_items

        self.durability_helper = DurabilityHelper(
            self.log,
            len(self.cluster.nodes_in_cluster),
            durability=self.durability_level,
            replicate_to=self.replicate_to,
            persist_to=self.persist_to)

        # Create sdk_clients for pool
        if self.sdk_client_pool:
            self.log.info("Creating SDK client pool")
            self.sdk_client_pool.create_clients(
                self.bucket_util.buckets[0],
                self.cluster.nodes_in_cluster,
                req_clients=self.sdk_pool_capacity,
                compression_settings=self.sdk_compression)

        # Reset active_resident_threshold to avoid further data load as DGM
        self.active_resident_threshold = 0
        self.cluster_util.print_cluster_stats()
        self.bucket_util.print_bucket_stats()
        self.log.info("==========Finished Basic_ops base setup========")

    def tearDown(self):
        super(basic_ops, self).tearDown()

    def do_basic_ops(self):
        KEY_NAME = 'key1'
        KEY_NAME2 = 'key2'
        self.log.info('Starting basic ops')

        default_bucket = self.bucket_util.get_all_buckets()[0]
        sdk_client = SDKClient([self.cluster.master],
                               default_bucket,
                               compression_settings=self.sdk_compression)
        # mcd = client.memcached(KEY_NAME)

        # MB-17231 - incr with full eviction
        rc = sdk_client.incr(KEY_NAME, delta=1)
        self.log.info('rc for incr: {0}'.format(rc))

        # MB-17289 del with meta
        rc = sdk_client.set(KEY_NAME, 0, 0, json.dumps({'value': 'value2'}))
        self.log.info('set is: {0}'.format(rc))
        # cas = rc[1]

        # wait for it to persist
        persisted = 0
        while persisted == 0:
            opaque, rep_time, persist_time, persisted, cas = \
                sdk_client.observe(KEY_NAME)

        try:
            rc = sdk_client.evict_key(KEY_NAME)
        except MemcachedError as exp:
            self.fail("Exception with evict meta - {0}".format(exp))

        CAS = 0xabcd
        try:
            # key, exp, flags, seqno, cas
            rc = mcd.del_with_meta(KEY_NAME2, 0, 0, 2, CAS)
        except MemcachedError as exp:
            self.fail("Exception with del_with meta - {0}".format(exp))

    # Reproduce test case for MB-28078
    def do_setWithMeta_twice(self):
        mc = MemcachedClient(self.cluster.master.ip, constants.memcached_port)
        mc.sasl_auth_plain(self.cluster.master.rest_username,
                           self.cluster.master.rest_password)
        mc.bucket_select('default')

        try:
            mc.setWithMeta('1', '{"Hello":"World"}', 3600, 0, 1,
                           0x1512a3186faa0000)
        except MemcachedError as error:
            self.log.info("<MemcachedError #%d ``%s''>" %
                          (error.status, error.message))
            self.fail("Error on First setWithMeta()")

        stats = mc.stats()
        self.log.info('curr_items: {0} and curr_temp_items:{1}'.format(
            stats['curr_items'], stats['curr_temp_items']))
        self.sleep(5, "Wait before checking the stats")
        stats = mc.stats()
        self.log.info('curr_items: {0} and curr_temp_items:{1}'.format(
            stats['curr_items'], stats['curr_temp_items']))

        try:
            mc.setWithMeta('1', '{"Hello":"World"}', 3600, 0, 1,
                           0x1512a3186faa0000)
        except MemcachedError as error:
            stats = mc.stats()
            self.log.info('After 2nd setWithMeta(), curr_items: {} '
                          'and curr_temp_items: {}'.format(
                              stats['curr_items'], stats['curr_temp_items']))
            if int(stats['curr_temp_items']) == 1:
                self.fail("Error on second setWithMeta(), "
                          "expected curr_temp_items to be 0")
            else:
                self.log.info("<MemcachedError #%d ``%s''>" %
                              (error.status, error.message))

    def generate_docs_bigdata(self,
                              docs_per_day,
                              start=0,
                              document_size=1024000):
        return doc_generator(self.key,
                             start,
                             docs_per_day,
                             key_size=self.key_size,
                             doc_size=document_size,
                             doc_type=self.doc_type,
                             target_vbucket=self.target_vbucket,
                             vbuckets=self.cluster_util.vbuckets,
                             randomize_doc_size=self.randomize_doc_size,
                             randomize_value=self.randomize_value)

    def test_doc_size(self):
        def check_durability_failures():
            self.log.error(task.sdk_acked_curd_failed.keys())
            self.log.error(task.sdk_exception_crud_succeed.keys())
            self.assertTrue(
                len(task.sdk_acked_curd_failed) == 0,
                "Durability failed for docs: %s" %
                task.sdk_acked_curd_failed.keys())
            self.assertTrue(
                len(task.sdk_exception_crud_succeed) == 0,
                "Durability failed for docs: %s" %
                task.sdk_acked_curd_failed.keys())

        """
        Basic tests for document CRUD operations using JSON docs
        """
        doc_op = self.input.param("doc_op", None)
        def_bucket = self.bucket_util.buckets[0]
        ignore_exceptions = list()
        retry_exceptions = list()
        supported_d_levels = self.bucket_util.get_supported_durability_levels()

        # Stat validation reference variables
        verification_dict = dict()
        verification_dict["ops_create"] = 0
        verification_dict["ops_update"] = 0
        verification_dict["ops_delete"] = 0
        verification_dict["rollback_item_count"] = 0
        verification_dict["sync_write_aborted_count"] = 0
        verification_dict["sync_write_committed_count"] = 0

        if self.target_vbucket and type(self.target_vbucket) is not list:
            self.target_vbucket = [self.target_vbucket]

        self.log.info("Creating doc_generator..")
        # Load basic docs into bucket
        doc_create = doc_generator(self.key,
                                   0,
                                   self.num_items,
                                   key_size=self.key_size,
                                   doc_size=self.doc_size,
                                   doc_type=self.doc_type,
                                   target_vbucket=self.target_vbucket,
                                   vbuckets=self.cluster_util.vbuckets,
                                   randomize_doc_size=self.randomize_doc_size,
                                   randomize_value=self.randomize_value)
        self.log.info("Loading {0} docs into the bucket: {1}".format(
            self.num_items, def_bucket))
        task = self.task.async_load_gen_docs(
            self.cluster,
            def_bucket,
            doc_create,
            "create",
            0,
            batch_size=self.batch_size,
            process_concurrency=self.process_concurrency,
            replicate_to=self.replicate_to,
            persist_to=self.persist_to,
            durability=self.durability_level,
            compression=self.sdk_compression,
            timeout_secs=self.sdk_timeout,
            ryow=self.ryow,
            check_persistence=self.check_persistence,
            scope=self.scope_name,
            collection=self.collection_name,
            sdk_client_pool=self.sdk_client_pool)
        self.task.jython_task_manager.get_task_result(task)

        if self.ryow:
            check_durability_failures()

        # Retry doc_exception code
        self.log.info("Validating failed doc's (if any) exceptions")
        doc_op_info_dict = dict()
        doc_op_info_dict[task] = self.bucket_util.get_doc_op_info_dict(
            def_bucket,
            "create",
            exp=0,
            replicate_to=self.replicate_to,
            persist_to=self.persist_to,
            durability=self.durability_level,
            timeout=self.sdk_timeout,
            time_unit="seconds",
            ignore_exceptions=ignore_exceptions,
            retry_exceptions=retry_exceptions)
        self.bucket_util.verify_doc_op_task_exceptions(doc_op_info_dict,
                                                       self.cluster,
                                                       self.sdk_client_pool)

        if len(doc_op_info_dict[task]["unwanted"]["fail"].keys()) != 0:
            self.fail("Failures in retry doc CRUDs: {0}".format(
                doc_op_info_dict[task]["unwanted"]["fail"]))

        self.log.info("Wait for ep_all_items_remaining to become '0'")
        self.bucket_util._wait_for_stats_all_buckets()

        # Update ref_val
        verification_dict["ops_create"] += \
            self.num_items - len(task.fail.keys())
        # Validate vbucket stats
        if self.durability_level in supported_d_levels:
            verification_dict["sync_write_committed_count"] += self.num_items

        failed = self.durability_helper.verify_vbucket_details_stats(
            def_bucket,
            self.cluster_util.get_kv_nodes(),
            vbuckets=self.cluster_util.vbuckets,
            expected_val=verification_dict)
        if failed:
            self.fail("Cbstat vbucket-details verification failed")

        # Verify initial doc load count
        self.log.info("Validating doc_count in buckets")
        self.bucket_util.validate_doc_count_as_per_collections(def_bucket)

        self.log.info("Creating doc_generator for doc_op")
        num_item_start_for_crud = int(self.num_items / 2)
        doc_update = doc_generator(self.key,
                                   0,
                                   num_item_start_for_crud,
                                   key_size=self.key_size,
                                   doc_size=self.doc_size,
                                   doc_type=self.doc_type,
                                   target_vbucket=self.target_vbucket,
                                   vbuckets=self.cluster_util.vbuckets,
                                   mutate=1,
                                   randomize_doc_size=self.randomize_doc_size,
                                   randomize_value=self.randomize_value)

        if self.target_vbucket:
            mutation_doc_count = len(doc_update.doc_keys)
        else:
            mutation_doc_count = (doc_update.end - doc_update.start +
                                  len(task.fail.keys()))

        if doc_op == "update":
            self.log.info("Performing 'update' mutation over the docs")
            task = self.task.async_load_gen_docs(
                self.cluster,
                def_bucket,
                doc_update,
                "update",
                0,
                batch_size=self.batch_size,
                process_concurrency=self.process_concurrency,
                replicate_to=self.replicate_to,
                persist_to=self.persist_to,
                durability=self.durability_level,
                compression=self.sdk_compression,
                timeout_secs=self.sdk_timeout,
                ryow=self.ryow,
                check_persistence=self.check_persistence,
                scope=self.scope_name,
                collection=self.collection_name,
                sdk_client_pool=self.sdk_client_pool)
            self.task.jython_task_manager.get_task_result(task)
            verification_dict["ops_update"] += mutation_doc_count
            if self.durability_level in supported_d_levels:
                verification_dict["sync_write_committed_count"] \
                    += mutation_doc_count
            if self.ryow:
                check_durability_failures()

            # Read all the values to validate update operation
            task = self.task.async_validate_docs(
                self.cluster,
                def_bucket,
                doc_update,
                "update",
                0,
                batch_size=self.batch_size,
                process_concurrency=self.process_concurrency,
                scope=self.scope_name,
                collection=self.collection_name,
                sdk_client_pool=self.sdk_client_pool)
            self.task.jython_task_manager.get_task_result(task)

        elif doc_op == "delete":
            self.log.info("Performing 'delete' mutation over the docs")
            task = self.task.async_load_gen_docs(
                self.cluster,
                def_bucket,
                doc_update,
                "delete",
                0,
                batch_size=self.batch_size,
                process_concurrency=self.process_concurrency,
                replicate_to=self.replicate_to,
                persist_to=self.persist_to,
                durability=self.durability_level,
                compression=self.sdk_compression,
                timeout_secs=self.sdk_timeout,
                ryow=self.ryow,
                check_persistence=self.check_persistence,
                scope=self.scope_name,
                collection=self.collection_name,
                sdk_client_pool=self.sdk_client_pool)
            self.task.jython_task_manager.get_task_result(task)
            if self.collection_name is None:
                target_scope = CbServer.default_scope
                target_collection = CbServer.default_collection
            else:
                target_scope = self.scope_name
                target_collection = self.collection_name

            def_bucket \
                .scopes[target_scope] \
                .collections[target_collection] \
                .num_items -= (self.num_items - num_item_start_for_crud)
            verification_dict["ops_delete"] += mutation_doc_count

            if self.durability_level in supported_d_levels:
                verification_dict["sync_write_committed_count"] \
                    += mutation_doc_count
            if self.ryow:
                check_durability_failures()

            # Read all the values to validate delete operation
            task = self.task.async_validate_docs(
                self.cluster,
                def_bucket,
                doc_update,
                "delete",
                0,
                batch_size=self.batch_size,
                process_concurrency=self.process_concurrency,
                sdk_client_pool=self.sdk_client_pool)
            self.task.jython_task_manager.get_task_result(task)

        else:
            self.log.warning("Unsupported doc_operation")

        self.log.info("Wait for ep_all_items_remaining to become '0'")
        self.bucket_util._wait_for_stats_all_buckets()

        failed = self.durability_helper.verify_vbucket_details_stats(
            def_bucket,
            self.cluster_util.get_kv_nodes(),
            vbuckets=self.cluster_util.vbuckets,
            expected_val=verification_dict)
        if failed:
            self.fail("Cbstat vbucket-details verification failed")

        self.log.info("Validating doc_count")
        self.bucket_util.validate_doc_count_as_per_collections(def_bucket)

    def test_large_doc_size(self):
        # bucket size=256MB, when Bucket gets filled 236MB then
        # test starts failing document size=2MB, No of docs = 221,
        # load 250 docs generate docs with size >= 1MB , See MB-29333

        self.doc_size *= 1024 * 1024
        gens_load = self.generate_docs_bigdata(docs_per_day=self.num_items,
                                               document_size=self.doc_size)
        for bucket in self.bucket_util.buckets:
            task = self.task.async_load_gen_docs(
                self.cluster,
                bucket,
                gens_load,
                "create",
                0,
                batch_size=10,
                process_concurrency=8,
                replicate_to=self.replicate_to,
                persist_to=self.persist_to,
                durability=self.durability_level,
                compression=self.sdk_compression,
                timeout_secs=self.sdk_timeout,
                sdk_client_pool=self.sdk_client_pool)
            self.task.jython_task_manager.get_task_result(task)

        # check if all the documents(250) are loaded with default timeout
        self.bucket_util.verify_stats_all_buckets(self.num_items)

    def test_large_doc_20MB(self):
        # test reproducer for MB-29258,
        # Load a doc which is greater than 20MB
        # with compression enabled and check if it fails
        # check with compression_mode as active, passive and off
        val_error = SDKException.ValueTooLargeException
        gens_load = self.generate_docs_bigdata(docs_per_day=1,
                                               document_size=(self.doc_size *
                                                              1024000))
        for bucket in self.bucket_util.buckets:
            task = self.task.async_load_gen_docs(
                self.cluster,
                bucket,
                gens_load,
                "create",
                0,
                batch_size=10,
                process_concurrency=8,
                replicate_to=self.replicate_to,
                persist_to=self.persist_to,
                durability=self.durability_level,
                compression=self.sdk_compression,
                timeout_secs=self.sdk_timeout,
                sdk_client_pool=self.sdk_client_pool)
            self.task.jython_task_manager.get_task_result(task)
            if self.doc_size > 20:
                if len(task.fail.keys()) == 0:
                    self.log_failure("No failures during large doc insert")
                for doc_id, doc_result in task.fail.items():
                    if val_error not in str(doc_result["error"]):
                        self.log_failure("Invalid exception for key %s: %s" %
                                         (doc_id, doc_result))
            else:
                if len(task.fail.keys()) != 0:
                    self.log_failure("Failures during large doc insert")

        for bucket in self.bucket_util.buckets:
            if self.doc_size > 20:
                # failed with error "Data Too Big" when document size > 20MB
                self.bucket_util.verify_stats_all_buckets(0)
            else:
                self.bucket_util.verify_stats_all_buckets(1)
                gens_update = self.generate_docs_bigdata(
                    docs_per_day=1, document_size=(21 * 1024000))
                task = self.task.async_load_gen_docs(
                    self.cluster,
                    bucket,
                    gens_update,
                    "update",
                    0,
                    batch_size=10,
                    process_concurrency=8,
                    replicate_to=self.replicate_to,
                    persist_to=self.persist_to,
                    durability=self.durability_level,
                    compression=self.sdk_compression,
                    timeout_secs=self.sdk_timeout,
                    sdk_client_pool=self.sdk_client_pool)
                self.task.jython_task_manager.get_task_result(task)
                if len(task.fail.keys()) != 1:
                    self.log_failure("Large docs inserted for keys: %s" %
                                     task.fail.keys())
                if len(task.fail.keys()) == 0:
                    self.log_failure("No failures during large doc insert")
                for key, crud_result in task.fail.items():
                    if SDKException.ValueTooLargeException \
                            not in str(crud_result["error"]):
                        self.log_failure("Unexpected error for key %s: %s" %
                                         (key, crud_result["error"]))
                for doc_id, doc_result in task.fail.items():
                    if val_error not in str(doc_result["error"]):
                        self.log_failure("Invalid exception for key %s: %s" %
                                         (doc_id, doc_result))
                self.bucket_util.verify_stats_all_buckets(1)
        self.validate_test_failure()

    def test_parallel_cruds(self):
        data_op_dict = dict()
        num_items = self.num_items
        half_of_num_items = self.num_items / 2
        supported_d_levels = self.bucket_util.get_supported_durability_levels()
        exp_values_to_test = [0, 900, 4000, 12999]

        # Initial doc_loading
        initial_load = doc_generator(self.key,
                                     0,
                                     self.num_items,
                                     doc_size=self.doc_size)
        task = self.task.async_load_gen_docs(
            self.cluster,
            self.bucket_util.buckets[0],
            initial_load,
            DocLoading.Bucket.DocOps.CREATE,
            0,
            batch_size=100,
            process_concurrency=8,
            compression=self.sdk_compression,
            timeout_secs=self.sdk_timeout,
            sdk_client_pool=self.sdk_client_pool)
        self.task.jython_task_manager.get_task_result(task)

        # Create required doc_gens and doc_op task object
        for doc_op in self.doc_ops:
            if doc_op == DocLoading.Bucket.DocOps.CREATE:
                num_items += half_of_num_items
                gen_start = self.num_items
                gen_end = self.num_items + half_of_num_items
            elif doc_op == DocLoading.Bucket.DocOps.DELETE:
                gen_start = 0
                gen_end = half_of_num_items
            else:
                gen_start = half_of_num_items
                gen_end = self.num_items

            d_level = ""
            replicate_to = persist_to = 0
            if self.num_replicas > 0:
                replicate_to = randint(1, self.num_replicas)
                persist_to = randint(0, self.num_replicas + 1)
            if not self.observe_test and choice([True, False]):
                d_level = choice(supported_d_levels)

            self.log.info("Doc_op %s, range (%d, %d), "
                          "replicate_to=%s, persist_to=%s, d_level=%s" %
                          (doc_op, gen_start, gen_end, replicate_to,
                           persist_to, d_level))

            data_op_dict[doc_op] = dict()
            data_op_dict[doc_op]["doc_gen"] = doc_generator(
                self.key,
                gen_start,
                gen_end,
                doc_size=self.doc_size,
                mutation_type=doc_op)
            data_op_dict[doc_op]["task"] = self.task.async_load_gen_docs(
                self.cluster,
                self.bucket_util.buckets[0],
                data_op_dict[doc_op]["doc_gen"],
                doc_op,
                exp=choice(exp_values_to_test),
                compression=self.sdk_compression,
                persist_to=persist_to,
                replicate_to=replicate_to,
                durability=d_level,
                timeout_secs=self.sdk_timeout,
                sdk_client_pool=self.sdk_client_pool,
                process_concurrency=1,
                batch_size=1,
                print_ops_rate=False,
                start_task=False)

        # Start all tasks
        for doc_op in self.doc_ops:
            self.task_manager.add_new_task(data_op_dict[doc_op]["task"])
        # Wait for doc_ops to complete and validate final doc value result
        for doc_op in self.doc_ops:
            self.task_manager.get_task_result(data_op_dict[doc_op]["task"])
            self.log.info("%s task completed" % doc_op)
            if data_op_dict[doc_op]["task"].fail:
                self.log_failure("Doc_loading failed for %s: %s" %
                                 (doc_op, data_op_dict[doc_op]["task"].fail))
            elif doc_op in [
                    DocLoading.Bucket.DocOps.CREATE,
                    DocLoading.Bucket.DocOps.UPDATE,
                    DocLoading.Bucket.DocOps.REPLACE,
                    DocLoading.Bucket.DocOps.DELETE
            ]:
                suppress_err_tbl = False
                if doc_op == DocLoading.Bucket.DocOps.DELETE:
                    suppress_err_tbl = True
                self.log.info("Validating %s results" % doc_op)
                # Read all the values to validate doc_operation values
                task = self.task.async_validate_docs(
                    self.cluster,
                    self.bucket_util.buckets[0],
                    data_op_dict[doc_op]["doc_gen"],
                    doc_op,
                    0,
                    batch_size=self.batch_size,
                    process_concurrency=self.process_concurrency,
                    sdk_client_pool=self.sdk_client_pool,
                    suppress_error_table=suppress_err_tbl)
                self.task.jython_task_manager.get_task_result(task)

        self.validate_test_failure()

    def test_diag_eval_curl(self):
        # Check if diag/eval can be done only by local host
        self.disable_diag_eval_on_non_local_host = \
            self.input.param("disable_diag_eval_non_local", False)
        port = self.cluster.master.port

        # check if local host can work fine
        cmd = []
        cmd_base = 'curl http://{0}:{1}@localhost:{2}/diag/eval ' \
            .format(self.cluster.master.rest_username,
                    self.cluster.master.rest_password, port)
        command = cmd_base + '-X POST -d \'os:cmd("env")\''
        cmd.append(command)
        command = cmd_base + '-X POST -d \'case file:read_file("/etc/passwd") of {ok, B} -> io:format("~p~n", [binary_to_term(B)]) end.\''
        cmd.append(command)

        shell = RemoteMachineShellConnection(self.cluster.master)
        for command in cmd:
            output, error = shell.execute_command(command)
            self.assertNotEquals("API is accessible from localhost only",
                                 output[0])

        # Disable allow_nonlocal_eval
        if not self.disable_diag_eval_on_non_local_host:
            command = cmd_base + '-X POST -d \'ns_config:set(allow_nonlocal_eval, true).\''
            _, _ = shell.execute_command(command)

        # Check ip address on diag/eval will not work fine
        # when allow_nonlocal_eval is disabled
        cmd = []
        cmd_base = 'curl http://{0}:{1}@{2}:{3}/diag/eval ' \
            .format(self.cluster.master.rest_username,
                    self.cluster.master.rest_password,
                    self.cluster.master.ip, port)
        command = cmd_base + '-X POST -d \'os:cmd("env")\''
        cmd.append(command)
        command = cmd_base + '-X POST -d \'case file:read_file("/etc/passwd") of {ok, B} -> io:format("~p~n", [binary_to_term(B)]) end.\''
        cmd.append(command)

        for command in cmd:
            output, error = shell.execute_command(command)
            if self.disable_diag_eval_on_non_local_host:
                self.assertEquals("API is accessible from localhost only",
                                  output[0])
            else:
                self.assertNotEquals("API is accessible from localhost only",
                                     output[0])

    def test_MB_40967(self):
        """
        1. Load initial docs into the bucket
        2. Perform continuous reads until get_cmd stats breaks in
           'cbstats timings' command
        """
        total_gets = 0
        max_gets = 2500000000
        bucket = self.bucket_util.buckets[0]
        doc_gen = doc_generator(self.key, 0, self.num_items, doc_size=1)
        create_task = self.task.async_load_gen_docs(
            self.cluster,
            bucket,
            doc_gen,
            "create",
            0,
            batch_size=100,
            process_concurrency=self.process_concurrency,
            timeout_secs=self.sdk_timeout)
        self.task_manager.get_task_result(create_task)

        cbstat = dict()
        kv_nodes = self.cluster_util.get_kv_nodes()
        for node in kv_nodes:
            shell = RemoteMachineShellConnection(node)
            cbstat[node] = Cbstats(shell)

        self.log.info("Start doc_reads until total_gets cross: %s" % max_gets)
        read_task = self.task.async_continuous_doc_ops(
            self.cluster,
            bucket,
            doc_gen,
            op_type="read",
            batch_size=self.batch_size,
            process_concurrency=self.process_concurrency,
            timeout_secs=self.sdk_timeout)
        self.sleep(60, "Wait for read task to start")
        while total_gets < max_gets:
            total_gets = 0
            for node in kv_nodes:
                output, error = cbstat[node].get_timings(bucket.name)
                if error:
                    self.log_failure("Error during cbstat timings: %s" % error)
                    break

                get_cmd_found = False
                for line in output:
                    if "get_cmd_" in line:
                        if "get_cmd_mean" in line:
                            break
                        get_cmd_found = True
                if not get_cmd_found:
                    self.log.error(output)
                    self.log_failure("cbstat timings get_cmd stats not found")
                    break
                vb_details = cbstat[node].vbucket_details(bucket.name)
                for _, vb_stats in vb_details.items():
                    total_gets += long(vb_stats["ops_get"])
            if self.test_failure:
                break
            self.sleep(
                120,
                "Total_gets: %s, itr: %s" % (total_gets, read_task.itr_count))

        read_task.end_task()
        self.task_manager.get_task_result(read_task)

        # Close all shell connections
        for node in kv_nodes:
            cbstat[node].shellConn.disconnect()

        self.validate_test_failure()

    def test_MB_41510(self):
        """
        1. Load initial docs into the bucket
        2. Perform continuous reads
        3. Perform 'mcstat reset' in parallel to the reads
        4. Perform 'cbstats timings' command to read the current values
        5. Validate there is no crash when stats are getting reset continuously
        """
        def reset_mcstat(bucket_name):
            mc_stat = dict()
            for t_node in kv_nodes:
                shell_conn = RemoteMachineShellConnection(t_node)
                mc_stat[t_node] = McStat(shell_conn)

            while not stop_thread:
                for t_node in mc_stat.keys():
                    try:
                        mc_stat[t_node].reset(bucket_name)
                    except Exception as mcstat_err:
                        self.log_failure(mcstat_err)
                if self.test_failure:
                    break

            for t_node in mc_stat.keys():
                mc_stat[t_node].shellConn.disconnect()

        def get_timings(bucket_name):
            cb_stat = dict()
            for t_node in kv_nodes:
                shell_conn = RemoteMachineShellConnection(t_node)
                cb_stat[t_node] = Cbstats(shell_conn)

            while not stop_thread:
                for t_node in cb_stat.keys():
                    try:
                        cb_stat[t_node].get_timings(bucket_name)
                    except Exception as cbstat_err:
                        self.log_failure(cbstat_err)
                if self.test_failure:
                    break

            for t_node in cb_stat.keys():
                cb_stat[t_node].shellConn.disconnect()

        total_gets = 0
        max_gets = 50000000
        stop_thread = False
        bucket = self.bucket_util.buckets[0]
        cb_stat_obj = dict()
        kv_nodes = self.cluster_util.get_kv_nodes()
        for node in self.cluster_util.get_kv_nodes():
            shell = RemoteMachineShellConnection(node)
            cb_stat_obj[node] = Cbstats(shell)

        doc_gen = doc_generator(self.key, 0, self.num_items, doc_size=1)
        create_task = self.task.async_load_gen_docs(
            self.cluster,
            bucket,
            doc_gen,
            "create",
            0,
            batch_size=500,
            process_concurrency=self.process_concurrency,
            timeout_secs=self.sdk_timeout)
        self.task_manager.get_task_result(create_task)

        mc_stat_reset_thread = Thread(target=reset_mcstat, args=[bucket.name])
        get_timings_thread = Thread(target=get_timings, args=[bucket.name])
        mc_stat_reset_thread.start()
        get_timings_thread.start()

        read_task = self.task.async_continuous_doc_ops(
            self.cluster,
            bucket,
            doc_gen,
            op_type="read",
            batch_size=self.batch_size,
            process_concurrency=self.process_concurrency,
            timeout_secs=self.sdk_timeout)

        while total_gets < max_gets:
            total_gets = 0
            try:
                for node in cb_stat_obj.keys():
                    vb_details = cb_stat_obj[node].vbucket_details(bucket.name)
                    for _, vb_stats in vb_details.items():
                        total_gets += long(vb_stats["ops_get"])
            except Exception as err:
                self.log_failure(err)

            self.log.info("Total gets: %s" % total_gets)
            result, core_msg, stream_msg = self.check_coredump_exist(
                self.servers, force_collect=True)

            if result is not False:
                self.log_failure(core_msg + stream_msg)
                break
            elif self.test_failure:
                break

            self.sleep(60, "Wait before next check")

        stop_thread = True
        read_task.end_task()
        mc_stat_reset_thread.join()
        get_timings_thread.join()

        # Close all shell connections
        for node in cb_stat_obj.keys():
            cb_stat_obj[node].shellConn.disconnect()

        self.validate_test_failure()

    def verify_stat(self, items, value="active"):
        mc = MemcachedClient(self.cluster.master.ip, constants.memcached_port)
        mc.sasl_auth_plain(self.cluster.master.rest_username,
                           self.cluster.master.rest_password)
        mc.bucket_select('default')
        stats = mc.stats()
        self.assertEquals(stats['ep_compression_mode'], value)
        self.assertEquals(int(stats['ep_item_compressor_num_compressed']),
                          items)
        self.assertNotEquals(int(stats['vb_active_itm_memory']),
                             int(stats['vb_active_itm_memory_uncompressed']))

    def test_compression_active_and_off(self):
        """
        test reproducer for MB-29272,
        Load some documents with compression mode set to active
        get the cbstats
        change compression mode to off and wait for minimum 250ms
        Load some more documents and check the compression is not done
        epengine.basic_ops.basic_ops.test_compression_active_and_off,items=10000,compression_mode=active

        :return:
        """
        # Load some documents with compression mode as active
        gen_create = doc_generator("eviction1_",
                                   start=0,
                                   end=self.num_items,
                                   key_size=self.key_size,
                                   doc_size=self.doc_size,
                                   doc_type=self.doc_type,
                                   vbuckets=self.cluster_util.vbuckets,
                                   randomize_doc_size=self.randomize_doc_size,
                                   randomize_value=self.randomize_value)
        gen_create2 = doc_generator("eviction2_",
                                    start=0,
                                    end=self.num_items,
                                    key_size=self.key_size,
                                    doc_size=self.doc_size,
                                    doc_type=self.doc_type,
                                    vbuckets=self.cluster_util.vbuckets,
                                    randomize_doc_size=self.randomize_doc_size,
                                    randomize_value=self.randomize_value)
        def_bucket = self.bucket_util.get_all_buckets()[0]
        task = self.task.async_load_gen_docs(
            self.cluster,
            def_bucket,
            gen_create,
            "create",
            0,
            batch_size=10,
            process_concurrency=8,
            replicate_to=self.replicate_to,
            persist_to=self.persist_to,
            durability=self.durability_level,
            compression=self.sdk_compression,
            timeout_secs=self.sdk_timeout,
            sdk_client_pool=self.sdk_client_pool)
        self.task.jython_task_manager.get_task_result(task)
        self.bucket_util._wait_for_stats_all_buckets()
        self.bucket_util.verify_stats_all_buckets(self.num_items)

        remote = RemoteMachineShellConnection(self.cluster.master)
        for bucket in self.bucket_util.buckets:
            # change compression mode to off
            output, _ = remote.execute_couchbase_cli(
                cli_command='bucket-edit',
                cluster_host="localhost:8091",
                user=self.cluster.master.rest_username,
                password=self.cluster.master.rest_password,
                options='--bucket=%s --compression-mode off' % bucket.name)
            self.assertTrue(' '.join(output).find('SUCCESS') != -1,
                            'compression mode set to off')

            # sleep for 10 sec (minimum 250sec)
            self.sleep(10)

        # Load data and check stats to see compression
        # is not done for newly added data
        task = self.task.async_load_gen_docs(
            self.cluster,
            def_bucket,
            gen_create2,
            "create",
            0,
            batch_size=10,
            process_concurrency=8,
            replicate_to=self.replicate_to,
            persist_to=self.persist_to,
            durability=self.durability_level,
            compression=self.sdk_compression,
            timeout_secs=self.sdk_timeout,
            sdk_client_pool=self.sdk_client_pool)
        self.task.jython_task_manager.get_task_result(task)
        self.bucket_util._wait_for_stats_all_buckets()
        self.bucket_util.verify_stats_all_buckets(self.num_items * 2)

    def MB36948(self):
        node_to_stop = self.servers[0]
        self.log.info("Adding index/query node")
        self.task.rebalance([self.cluster.master], [self.servers[2]], [],
                            services=["n1ql,index"])
        self.log.info("Creating SDK client connection")
        client = SDKClient([self.cluster.master],
                           self.bucket_util.buckets[0],
                           compression_settings=self.sdk_compression)

        self.log.info("Stopping memcached on: %s" % node_to_stop)
        ssh_conn = RemoteMachineShellConnection(node_to_stop)
        err_sim = CouchbaseError(self.log, ssh_conn)
        err_sim.create(CouchbaseError.STOP_MEMCACHED)

        result = client.crud("create", "abort1", "abort1_val")
        if not result["status"]:
            self.log_failure("Async SET failed")

        result = client.crud("update",
                             "abort1",
                             "abort1_val",
                             durability=self.durability_level,
                             timeout=3,
                             time_unit="seconds")
        if result["status"]:
            self.log_failure("Sync write succeeded")
        if SDKException.DurabilityAmbiguousException not in result["error"]:
            self.log_failure("Invalid exception for sync_write: %s" % result)

        self.log.info("Resuming memcached on: %s" % node_to_stop)
        err_sim.revert(CouchbaseError.STOP_MEMCACHED)

        self.bucket_util._wait_for_stats_all_buckets()
        self.bucket_util.verify_stats_all_buckets(1)

        self.log.info("Closing ssh & SDK connections")
        ssh_conn.disconnect()
        client.close()

        self.validate_test_failure()

    def do_get_random_key(self):
        # MB-31548, get_Random key gets hung sometimes.
        mc = MemcachedClient(self.cluster.master.ip, constants.memcached_port)
        mc.sasl_auth_plain(self.cluster.master.rest_username,
                           self.cluster.master.rest_password)
        mc.bucket_select('default')

        count = 0
        while count < 1000000:
            count += 1
            try:
                mc.get_random_key()
            except MemcachedError as error:
                self.fail("<MemcachedError #%d ``%s''>" %
                          (error.status, error.message))
            if count % 1000 == 0:
                self.log.info('The number of iteration is {}'.format(count))
Ejemplo n.º 5
0
class UpgradeTests(UpgradeBase):
    def setUp(self):
        super(UpgradeTests, self).setUp()
        self.durability_helper = DurabilityHelper(
            self.log,
            len(self.cluster.nodes_in_cluster))
        self.verification_dict = dict()
        self.verification_dict["ops_create"] = self.num_items
        self.verification_dict["ops_delete"] = 0

    def tearDown(self):
        super(UpgradeTests, self).tearDown()

    def __trigger_cbcollect(self, log_path):
        self.log.info("Triggering cb_collect_info")
        rest = RestConnection(self.cluster.master)
        nodes = rest.get_nodes()
        status = self.cluster_util.trigger_cb_collect_on_cluster(rest, nodes)

        if status is True:
            self.cluster_util.wait_for_cb_collect_to_complete(rest)
            status = self.cluster_util.copy_cb_collect_logs(
                rest, nodes, self.cluster, log_path)
            if status is False:
                self.log_failure("API copy_cb_collect_logs detected failure")
        else:
            self.log_failure("API perform_cb_collect returned False")
        return status

    def __play_with_collection(self):
        # Client based scope/collection crud tests
        client = self.sdk_client_pool.get_client_for_bucket(self.bucket)
        scope_name = self.bucket_util.get_random_name(
            max_length=CbServer.max_scope_name_len)
        collection_name = self.bucket_util.get_random_name(
            max_length=CbServer.max_collection_name_len)

        # Create scope using SDK client
        client.create_scope(scope_name)
        # Create collection under default scope and custom scope
        client.create_collection(collection_name, CbServer.default_scope)
        client.create_collection(collection_name, scope_name)
        # Drop created collections
        client.drop_collection(CbServer.default_scope, collection_name)
        client.drop_collection(scope_name, collection_name)
        # Drop created scope using SDK client
        client.drop_scope(scope_name)

        # MB-44092 - Collection load not working with pre-existing connections
        DocLoaderUtils.sdk_client_pool = SDKClientPool()
        self.log.info("Creating required SDK clients for client_pool")
        for bucket in self.bucket_util.buckets:
            DocLoaderUtils.sdk_client_pool.create_clients(
                bucket, [self.cluster.master], 1,
                compression_settings=self.sdk_compression)

        # Create scopes/collections phase
        collection_load_spec = \
            self.bucket_util.get_crud_template_from_package("initial_load")
        collection_load_spec["doc_crud"][
            MetaCrudParams.DocCrud.CREATE_PERCENTAGE_PER_COLLECTION] = 0
        collection_load_spec["doc_crud"][
            MetaCrudParams.DocCrud.NUM_ITEMS_FOR_NEW_COLLECTIONS] = 5000
        collection_load_spec[
            MetaCrudParams.SCOPES_TO_ADD_PER_BUCKET] = 5
        collection_load_spec[
            MetaCrudParams.COLLECTIONS_TO_ADD_FOR_NEW_SCOPES] = 10
        collection_load_spec[
            MetaCrudParams.COLLECTIONS_TO_ADD_PER_BUCKET] = 50
        collection_task = \
            self.bucket_util.run_scenario_from_spec(self.task,
                                                    self.cluster,
                                                    self.bucket_util.buckets,
                                                    collection_load_spec,
                                                    mutation_num=1,
                                                    batch_size=500)
        if collection_task.result is False:
            self.log_failure("Collection task failed")
            return
        self.bucket_util._wait_for_stats_all_buckets()
        self.bucket_util._wait_for_stats_all_buckets(cbstat_cmd="all",
                                                     stat_name="ep_queue_size",
                                                     timeout=60)
        self.bucket_util.validate_docs_per_collections_all_buckets()

        # Drop and recreate scope/collections
        collection_load_spec = \
            self.bucket_util.get_crud_template_from_package("initial_load")
        collection_load_spec["doc_crud"][
            MetaCrudParams.DocCrud.CREATE_PERCENTAGE_PER_COLLECTION] = 0
        collection_load_spec[MetaCrudParams.COLLECTIONS_TO_DROP] = 10
        collection_load_spec[MetaCrudParams.SCOPES_TO_DROP] = 2
        collection_task = \
            self.bucket_util.run_scenario_from_spec(self.task,
                                                    self.cluster,
                                                    self.bucket_util.buckets,
                                                    collection_load_spec,
                                                    mutation_num=1,
                                                    batch_size=500)
        if collection_task.result is False:
            self.log_failure("Drop scope/collection failed")
            return

        # MB-44092 - Close client_pool after collection ops
        DocLoaderUtils.sdk_client_pool.shutdown()

        self.bucket_util._wait_for_stats_all_buckets()
        self.bucket_util._wait_for_stats_all_buckets(cbstat_cmd="all",
                                                     stat_name="ep_queue_size",
                                                     timeout=60)
        self.bucket_util.validate_docs_per_collections_all_buckets()

    def test_upgrade(self):
        create_batch_size = 10000
        update_task = None

        t_durability_level = ""
        if self.cluster_supports_sync_write:
            t_durability_level = Bucket.DurabilityLevel.MAJORITY

        if self.upgrade_with_data_load:
            self.log.info("Starting async doc updates")
            update_task = self.task.async_continuous_doc_ops(
                self.cluster, self.bucket, self.gen_load,
                op_type=DocLoading.Bucket.DocOps.UPDATE,
                process_concurrency=1,
                persist_to=1,
                replicate_to=1,
                durability=t_durability_level,
                timeout_secs=30)

        create_gen = doc_generator(self.key, self.num_items,
                                   self.num_items+create_batch_size)
        self.log.info("Upgrading cluster nodes to target version")
        node_to_upgrade = self.fetch_node_to_upgrade()
        while node_to_upgrade is not None:
            self.log.info("Selected node for upgrade: %s"
                          % node_to_upgrade.ip)
            self.upgrade_function[self.upgrade_type](node_to_upgrade,
                                                     self.upgrade_version)
            self.cluster_util.print_cluster_stats()

            # Validate sync_write results after upgrade
            if self.atomicity:
                create_batch_size = 10
                create_gen = doc_generator(
                    self.key,
                    self.num_items,
                    self.num_items+create_batch_size)
                sync_write_task = self.task.async_load_gen_docs_atomicity(
                    self.cluster, self.bucket_util.buckets,
                    create_gen, DocLoading.Bucket.DocOps.CREATE,
                    process_concurrency=1,
                    transaction_timeout=self.transaction_timeout,
                    record_fail=True)
            else:
                sync_write_task = self.task.async_load_gen_docs(
                    self.cluster, self.bucket, create_gen,
                    DocLoading.Bucket.DocOps.CREATE,
                    durability=self.durability_level,
                    timeout_secs=self.sdk_timeout,
                    sdk_client_pool=self.sdk_client_pool,
                    process_concurrency=4,
                    skip_read_on_error=True,
                    suppress_error_table=True)
            self.task_manager.get_task_result(sync_write_task)

            node_to_upgrade = self.fetch_node_to_upgrade()
            if self.atomicity:
                self.sleep(10)
                current_items = self.bucket_util.get_bucket_current_item_count(
                    self.cluster, self.bucket)
                if node_to_upgrade is None:
                    if current_items < self.num_items+create_batch_size:
                        self.log_failure(
                            "Failures after cluster upgrade {} {}"
                            .format(current_items,
                                    self.num_items+create_batch_size))
                elif current_items > self.num_items:
                    self.log_failure(
                        "SyncWrite succeeded with mixed mode cluster")
            else:
                if node_to_upgrade is None:
                    if sync_write_task.fail.keys():
                        self.log_failure("Failures after cluster upgrade")
                    else:
                        self.num_items += create_batch_size
                        self.bucket.scopes[
                            CbServer.default_scope].collections[
                            CbServer.default_collection] \
                            .num_items += create_batch_size
                elif self.cluster_supports_sync_write:
                    if sync_write_task.fail:
                        self.log.error("SyncWrite failed: %s"
                                       % sync_write_task.fail)
                        self.log_failure("SyncWrite failed during upgrade")
                    else:
                        self.num_items += create_batch_size
                        self.bucket.scopes[
                            CbServer.default_scope].collections[
                            CbServer.default_collection] \
                            .num_items += create_batch_size
                        create_gen = doc_generator(
                            self.key,
                            self.num_items,
                            self.num_items + create_batch_size)
                elif len(sync_write_task.fail.keys()) != create_batch_size:
                    self.log_failure(
                        "SyncWrite succeeded with mixed mode cluster")
                else:
                    for doc_id, doc_result in sync_write_task.fail.items():
                        if SDKException.FeatureNotAvailableException \
                                not in str(doc_result["error"]):
                            self.log_failure("Invalid exception for %s: %s"
                                             % (doc_id, doc_result))

            # Halt further upgrade if test has failed during current upgrade
            if self.test_failure is not None:
                break

        # Validate default collection stats before collection ops
        self.bucket_util._wait_for_stats_all_buckets(cbstat_cmd="all",
                                                     stat_name="ep_queue_size",
                                                     timeout=60)
        self.bucket_util.validate_docs_per_collections_all_buckets()

        # Play with collection if upgrade was successful
        if not self.test_failure:
            self.__play_with_collection()

        if self.upgrade_with_data_load:
            # Wait for update_task to complete
            update_task.end_task()
            self.task_manager.get_task_result(update_task)

        self.validate_test_failure()

    def test_bucket_durability_upgrade(self):
        update_task = None
        self.sdk_timeout = 60
        create_batch_size = 10000
        if self.atomicity:
            create_batch_size = 10

        # To make sure sync_write can we supported by initial cluster version
        sync_write_support = True
        if float(self.initial_version[0:3]) < 6.5:
            sync_write_support = False

        if sync_write_support:
            self.verification_dict["rollback_item_count"] = 0
            self.verification_dict["sync_write_aborted_count"] = 0

        if self.upgrade_with_data_load:
            self.log.info("Starting async doc updates")
            update_task = self.task.async_continuous_doc_ops(
                self.cluster, self.bucket, self.gen_load,
                op_type=DocLoading.Bucket.DocOps.UPDATE,
                process_concurrency=1,
                persist_to=1,
                replicate_to=1,
                timeout_secs=30)

        self.log.info("Upgrading cluster nodes to target version")
        node_to_upgrade = self.fetch_node_to_upgrade()
        while node_to_upgrade is not None:
            self.log.info("Selected node for upgrade: %s"
                          % node_to_upgrade.ip)
            self.upgrade_function[self.upgrade_type](node_to_upgrade,
                                                     self.upgrade_version)
            try:
                self.cluster.update_master_using_diag_eval(
                    self.cluster.servers[0])
            except Exception:
                self.cluster.update_master_using_diag_eval(
                    self.cluster.servers[self.nodes_init-1])

            create_gen = doc_generator(self.key, self.num_items,
                                       self.num_items+create_batch_size)
            # Validate sync_write results after upgrade
            if self.atomicity:
                sync_write_task = self.task.async_load_gen_docs_atomicity(
                    self.cluster, self.bucket_util.buckets,
                    create_gen, DocLoading.Bucket.DocOps.CREATE,
                    process_concurrency=1,
                    transaction_timeout=self.transaction_timeout,
                    record_fail=True)
            else:
                sync_write_task = self.task.async_load_gen_docs(
                    self.cluster, self.bucket, create_gen,
                    DocLoading.Bucket.DocOps.CREATE,
                    timeout_secs=self.sdk_timeout,
                    process_concurrency=4,
                    sdk_client_pool=self.sdk_client_pool,
                    skip_read_on_error=True,
                    suppress_error_table=True)
            self.task_manager.get_task_result(sync_write_task)
            self.num_items += create_batch_size

            retry_index = 0
            while retry_index < 5:
                self.sleep(3, "Wait for num_items to match")
                current_items = self.bucket_util.get_bucket_current_item_count(
                    self.cluster, self.bucket)
                if current_items == self.num_items:
                    break
                self.log.debug("Num_items mismatch. Expected: %s, Actual: %s"
                               % (self.num_items, current_items))
            # Doc count validation
            self.cluster_util.print_cluster_stats()

            self.verification_dict["ops_create"] += create_batch_size
            self.summary.add_step("Upgrade %s" % node_to_upgrade.ip)

            # Halt further upgrade if test has failed during current upgrade
            if self.test_failure:
                break

            node_to_upgrade = self.fetch_node_to_upgrade()

        if self.upgrade_with_data_load:
            # Wait for update_task to complete
            update_task.end_task()
            self.task_manager.get_task_result(update_task)
        else:
            self.verification_dict["ops_update"] = 0

        # Cb_stats vb-details validation
        failed = self.durability_helper.verify_vbucket_details_stats(
            self.bucket_util.buckets[0],
            self.cluster_util.get_kv_nodes(),
            vbuckets=self.cluster_util.vbuckets,
            expected_val=self.verification_dict)
        if failed:
            self.log_failure("Cbstat vbucket-details validation failed")
        self.summary.add_step("Cbstats vb-details verification")

        self.validate_test_failure()

        possible_d_levels = dict()
        possible_d_levels[Bucket.Type.MEMBASE] = \
            self.bucket_util.get_supported_durability_levels()
        possible_d_levels[Bucket.Type.EPHEMERAL] = [
            Bucket.DurabilityLevel.NONE,
            Bucket.DurabilityLevel.MAJORITY]
        len_possible_d_levels = len(possible_d_levels[self.bucket_type]) - 1

        if not sync_write_support:
            self.verification_dict["rollback_item_count"] = 0
            self.verification_dict["sync_write_aborted_count"] = 0

        # Perform bucket_durability update
        key, value = doc_generator("b_durability_doc", 0, 1).next()
        client = SDKClient([self.cluster.master], self.bucket_util.buckets[0])
        for index, d_level in enumerate(possible_d_levels[self.bucket_type]):
            self.log.info("Updating bucket_durability=%s" % d_level)
            self.bucket_util.update_bucket_property(
                self.bucket_util.buckets[0],
                bucket_durability=BucketDurability[d_level])
            self.bucket_util.print_bucket_stats()

            buckets = self.bucket_util.get_all_buckets()
            if buckets[0].durability_level != BucketDurability[d_level]:
                self.log_failure("New bucket_durability not taken")

            self.summary.add_step("Update bucket_durability=%s" % d_level)

            self.sleep(10, "MB-39678: Bucket_d_level change to take effect")

            if index == 0:
                op_type = DocLoading.Bucket.DocOps.CREATE
                self.verification_dict["ops_create"] += 1
            elif index == len_possible_d_levels:
                op_type = DocLoading.Bucket.DocOps.DELETE
                self.verification_dict["ops_delete"] += 1
            else:
                op_type = DocLoading.Bucket.DocOps.UPDATE
                if "ops_update" in self.verification_dict:
                    self.verification_dict["ops_update"] += 1

            result = client.crud(op_type, key, value,
                                 timeout=self.sdk_timeout)
            if result["status"] is False:
                self.log_failure("Doc_op %s failed on key %s: %s"
                                 % (op_type, key, result["error"]))
            self.summary.add_step("Doc_op %s" % op_type)
        client.close()

        # Cb_stats vb-details validation
        failed = self.durability_helper.verify_vbucket_details_stats(
            self.bucket_util.buckets[0],
            self.cluster_util.get_kv_nodes(),
            vbuckets=self.cluster_util.vbuckets,
            expected_val=self.verification_dict)
        if failed:
            self.log_failure("Cbstat vbucket-details validation failed")
        self.summary.add_step("Cbstats vb-details verification")
        self.validate_test_failure()

    def test_transaction_doc_isolation(self):
        def run_transaction_updates():
            self.log.info("Starting transaction updates in parallel")
            while not stop_thread:
                commit_trans = choice([True, False])
                trans_update_task = self.task.async_load_gen_docs_atomicity(
                    self.cluster, self.bucket_util.buckets, self.gen_load,
                    DocLoading.Bucket.DocOps.UPDATE,
                    exp=self.maxttl,
                    batch_size=50,
                    process_concurrency=3,
                    timeout_secs=self.sdk_timeout,
                    update_count=self.update_count,
                    transaction_timeout=self.transaction_timeout,
                    commit=commit_trans,
                    durability=self.durability_level,
                    sync=self.sync, defer=self.defer,
                    retries=0)
                self.task_manager.get_task_result(trans_update_task)

        stop_thread = False
        update_task = None
        self.sdk_timeout = 60

        self.log.info("Upgrading cluster nodes to target version")
        node_to_upgrade = self.fetch_node_to_upgrade()
        while node_to_upgrade is not None:
            self.log.info("Selected node for upgrade: %s"
                          % node_to_upgrade.ip)
            if self.upgrade_with_data_load:
                update_task = Thread(target=run_transaction_updates)
                update_task.start()

            self.upgrade_function[self.upgrade_type](node_to_upgrade,
                                                     self.upgrade_version)
            try:
                self.cluster.update_master_using_diag_eval(
                    self.cluster.servers[0])
            except Exception:
                self.cluster.update_master_using_diag_eval(
                    self.cluster.servers[self.nodes_init-1])

            if self.upgrade_with_data_load:
                stop_thread = True
                update_task.join()

            self.cluster_util.print_cluster_stats()
            self.bucket_util.print_bucket_stats()

            self.summary.add_step("Upgrade %s" % node_to_upgrade.ip)

            # Halt further upgrade if test has failed during current upgrade
            if self.test_failure:
                break

            node_to_upgrade = self.fetch_node_to_upgrade()
            for bucket in self.bucket_util.get_all_buckets():
                tombstone_doc_supported = \
                    "tombstonedUserXAttrs" in bucket.bucketCapabilities
                if node_to_upgrade is None and not tombstone_doc_supported:
                    self.log_failure("Tombstone docs not added to %s "
                                     "capabilities" % bucket.name)
                elif node_to_upgrade is not None and tombstone_doc_supported:
                    self.log_failure("Tombstone docs supported for %s before "
                                     "cluster upgrade" % bucket.name)

        self.validate_test_failure()

        create_gen = doc_generator(self.key, self.num_items,
                                   self.num_items*2)
        # Start transaction load after node upgrade
        trans_task = self.task.async_load_gen_docs_atomicity(
            self.cluster, self.bucket_util.buckets,
            create_gen, DocLoading.Bucket.DocOps.CREATE, exp=self.maxttl,
            batch_size=50,
            process_concurrency=8,
            timeout_secs=self.sdk_timeout,
            update_count=self.update_count,
            transaction_timeout=self.transaction_timeout,
            commit=True,
            durability=self.durability_level,
            sync=self.sync, defer=self.defer,
            retries=0)
        self.task_manager.get_task_result(trans_task)

    def test_cbcollect_info(self):
        self.parse = self.input.param("parse", False)
        self.metric_name = self.input.param("metric_name", "kv_curr_items")
        log_path = self.input.param("logs_folder")
        self.log.info("Starting update tasks")
        update_tasks = list()
        update_tasks.append(self.task.async_continuous_doc_ops(
            self.cluster, self.bucket, self.gen_load,
            op_type=DocLoading.Bucket.DocOps.UPDATE,
            persist_to=1,
            replicate_to=1,
            process_concurrency=1,
            batch_size=10,
            timeout_secs=30))
        update_tasks.append(self.task.async_continuous_doc_ops(
            self.cluster, self.bucket, self.gen_load,
            op_type=DocLoading.Bucket.DocOps.UPDATE,
            replicate_to=1,
            process_concurrency=1,
            batch_size=10,
            timeout_secs=30))
        update_tasks.append(self.task.async_continuous_doc_ops(
            self.cluster, self.bucket, self.gen_load,
            op_type=DocLoading.Bucket.DocOps.UPDATE,
            persist_to=1,
            process_concurrency=1,
            batch_size=10,
            timeout_secs=30))

        node_to_upgrade = self.fetch_node_to_upgrade()
        while node_to_upgrade is not None:
            # Cbcollect with mixed mode cluster
            status = self.__trigger_cbcollect(log_path)
            if status is False:
                break

            self.log.info("Selected node for upgrade: %s"
                          % node_to_upgrade.ip)
            self.upgrade_function[self.upgrade_type](node_to_upgrade,
                                                     self.upgrade_version)
            self.cluster_util.print_cluster_stats()

            try:
                self.cluster.update_master_using_diag_eval(
                    self.cluster.servers[0])
            except Exception:
                self.cluster.update_master_using_diag_eval(
                    self.cluster.servers[self.nodes_init-1])

            # TODO: Do some validations here
            try:
                self.get_all_metrics(self.parse, self.metric_name)
            except Exception:
                pass

            node_to_upgrade = self.fetch_node_to_upgrade()

            # Halt further upgrade if test has failed during current upgrade
            if self.test_failure is True:
                break

        # Metrics should work in fully upgraded cluster
        self.get_all_metrics(self.parse, self.metric_name)
        # Cbcollect with fully upgraded cluster
        self.__trigger_cbcollect(log_path)

        for update_task in update_tasks:
            # Wait for update_task to complete
            update_task.end_task()
            self.task_manager.get_task_result(update_task)

        self.validate_test_failure()

    def get_low_cardinality_metrics(self, parse):
        content = None
        for server in self.cluster_util.get_kv_nodes():
            content = StatsHelper(server).get_prometheus_metrics(parse=parse)
            if not parse:
                StatsHelper(server)._validate_metrics(content)
        for line in content:
            self.log.info(line.strip("\n"))

    def get_high_cardinality_metrics(self,parse):
        content = None
        try:
            for server in self.cluster_util.get_kv_nodes():
                content = StatsHelper(server).get_prometheus_metrics_high(
                    parse=parse)
                if not parse:
                    StatsHelper(server)._validate_metrics(content)
            for line in content:
                self.log.info(line.strip("\n"))
        except:
            pass

    def get_range_api_metrics(self, metric_name):
        label_values = {"bucket": self.bucket_util.buckets[0].name,
                        "nodes": self.cluster.master.ip}
        content = StatsHelper(self.cluster.master).get_range_api_metrics(
            metric_name, label_values=label_values)
        self.log.info(content)

    def get_instant_api(self, metric_name):
        pass

    def get_all_metrics(self, parse, metrics):
        self.get_low_cardinality_metrics(parse)
        self.get_high_cardinality_metrics(parse)
        self.get_range_api_metrics(metrics)
        self.get_instant_api(metrics)
Ejemplo n.º 6
0
    def test_index_with_aborts(self):
        """
        1. Create index (2i/view) on default bucket
        2. Load multiple docs such that all sync_writes will be aborted
        3. Verify nothing went into indexing
        4. Load sync_write docs such that they are successful
        5. Validate the mutated docs are taken into indexing
        :return:
        """

        crud_batch_size = 50
        def_bucket = self.cluster.buckets[0]
        kv_nodes = self.cluster_util.get_kv_nodes(self.cluster)
        replica_vbs = dict()
        verification_dict = dict()
        index_item_count = dict()
        expected_num_indexed = dict()
        load_gen = dict()
        load_gen["ADD"] = dict()
        load_gen["SET"] = dict()
        partial_aborts = ["initial_aborts", "aborts_at_end"]

        durability_helper = DurabilityHelper(
            self.log,
            len(self.cluster.nodes_in_cluster),
            durability=self.durability_level,
            replicate_to=self.replicate_to,
            persist_to=self.persist_to)

        if self.create_index_during == "before_doc_ops":
            self.create_gsi_indexes(def_bucket)

        curr_items = self.bucket_util.get_bucket_current_item_count(
            self.cluster, def_bucket)
        if self.sync_write_abort_pattern in ["all_aborts", "initial_aborts"]:
            self.bucket_util.flush_bucket(self.cluster, def_bucket)
            self.num_items = 0
        else:
            self.num_items = curr_items

        self.log.info("Disabling auto_failover to avoid node failures")
        status = RestConnection(self.cluster.master) \
            .update_autofailover_settings(False, 120)
        self.assertTrue(status, msg="Failure during disabling auto-failover")

        # Validate vbucket stats
        verification_dict["ops_create"] = self.num_items
        verification_dict["ops_update"] = 0
        # verification_dict["ops_delete"] = 0
        verification_dict["rollback_item_count"] = 0
        verification_dict["sync_write_aborted_count"] = 0
        verification_dict["sync_write_committed_count"] = 0

        index_item_count["#primary"] = self.num_items
        index_item_count["durable_add_aborts"] = 0
        index_item_count["durable_set_aborts"] = 0
        expected_num_indexed["#primary"] = curr_items
        expected_num_indexed["durable_add_aborts"] = 0
        expected_num_indexed["durable_set_aborts"] = 0

        if self.create_index_during == "before_doc_ops":
            self.validate_indexed_doc_count(def_bucket, index_item_count)

        self.log.info("Loading docs such that all sync_writes will be aborted")
        for server in kv_nodes:
            ssh_shell = RemoteMachineShellConnection(server)
            cbstats = Cbstats(server)
            replica_vbs[server] = cbstats.vbucket_list(def_bucket.name,
                                                       "replica")
            load_gen["ADD"][server] = list()
            load_gen["ADD"][server].append(
                doc_generator(self.key,
                              0,
                              crud_batch_size,
                              target_vbucket=replica_vbs[server],
                              mutation_type="ADD"))
            if self.sync_write_abort_pattern in partial_aborts:
                load_gen["ADD"][server].append(
                    doc_generator(self.key,
                                  10000,
                                  crud_batch_size,
                                  target_vbucket=replica_vbs[server],
                                  mutation_type="ADD"))
                verification_dict["ops_create"] += crud_batch_size
                verification_dict["sync_write_committed_count"] += \
                    crud_batch_size
                index_item_count["#primary"] += crud_batch_size
                index_item_count["durable_add_aborts"] += crud_batch_size
                expected_num_indexed["#primary"] += crud_batch_size
                expected_num_indexed["durable_add_aborts"] += crud_batch_size

            task_success = self.bucket_util.load_durable_aborts(
                ssh_shell, load_gen["ADD"][server], self.cluster, def_bucket,
                self.durability_level, DocLoading.Bucket.DocOps.CREATE,
                self.sync_write_abort_pattern)
            if not task_success:
                self.log_failure("Failure during load_abort task")

            verification_dict["sync_write_aborted_count"] += \
                crud_batch_size
            if self.create_index_during == "before_doc_ops":
                self.validate_indexed_doc_count(def_bucket, index_item_count)

            load_gen["SET"][server] = list()
            load_gen["SET"][server].append(
                doc_generator(self.key,
                              0,
                              crud_batch_size,
                              target_vbucket=replica_vbs[server],
                              mutation_type="SET"))
            if self.sync_write_abort_pattern in partial_aborts:
                load_gen["SET"][server].append(
                    doc_generator(self.key,
                                  10000,
                                  crud_batch_size,
                                  target_vbucket=replica_vbs[server],
                                  mutation_type="SET"))
                verification_dict["ops_update"] += crud_batch_size
                verification_dict["sync_write_committed_count"] += \
                    crud_batch_size
                index_item_count["durable_add_aborts"] -= crud_batch_size
                index_item_count["durable_set_aborts"] += crud_batch_size
                expected_num_indexed["#primary"] += crud_batch_size
                expected_num_indexed["durable_add_aborts"] += crud_batch_size
                expected_num_indexed["durable_set_aborts"] += crud_batch_size

            verification_dict["sync_write_aborted_count"] += \
                crud_batch_size
            task_success = self.bucket_util.load_durable_aborts(
                ssh_shell, load_gen["SET"][server], self.cluster, def_bucket,
                self.durability_level, DocLoading.Bucket.DocOps.UPDATE,
                self.sync_write_abort_pattern)
            if not task_success:
                self.log_failure("Failure during load_abort task")

            ssh_shell.disconnect()

            if self.create_index_during == "before_doc_ops":
                self.validate_indexed_doc_count(def_bucket, index_item_count)
        failed = durability_helper.verify_vbucket_details_stats(
            def_bucket,
            kv_nodes,
            vbuckets=self.cluster.vbuckets,
            expected_val=verification_dict)
        if failed:
            self.log_failure("Cbstat vbucket-details verification failed")
        self.validate_test_failure()

        if self.create_index_during == "after_doc_ops":
            self.create_gsi_indexes(def_bucket)
            self.validate_indexed_doc_count(def_bucket, index_item_count)

        self.log.info("Verify aborts are not indexed")
        self.validate_indexed_count_from_stats(def_bucket,
                                               expected_num_indexed,
                                               index_item_count)

        if not self.use_gsi_for_primary:
            self.log.info("Wait of any indexing_activity to complete")
            index_monitor_task = self.cluster_util.async_monitor_active_task(
                self.cluster.master,
                "indexer",
                "_design/ddl_#primary",
                num_iteration=20,
                wait_task=True)[0]
            self.task_manager.get_task_result(index_monitor_task)
            self.assertTrue(index_monitor_task.result,
                            "Indexer task still running on server")

        for server in kv_nodes:
            if self.sync_write_abort_pattern == "initial_aborts":
                load_gen["ADD"][server] = load_gen["ADD"][server][:1]
                load_gen["SET"][server] = load_gen["SET"][server][:1]
            elif self.sync_write_abort_pattern == "aborts_at_end":
                load_gen["ADD"][server] = load_gen["ADD"][server][-1:]
                load_gen["SET"][server] = load_gen["SET"][server][-1:]

        self.log.info("Load sync_write docs such that they are successful")
        for server in kv_nodes:
            for gen_load in load_gen["ADD"][server]:
                task = self.task.async_load_gen_docs(
                    self.cluster,
                    def_bucket,
                    gen_load,
                    "create",
                    0,
                    batch_size=50,
                    process_concurrency=8,
                    replicate_to=self.replicate_to,
                    persist_to=self.persist_to,
                    durability=self.durability_level,
                    timeout_secs=self.sdk_timeout)
                self.task.jython_task_manager.get_task_result(task)

                if len(task.fail.keys()) != 0:
                    self.log_failure("Some failures seen during doc_ops")

                index_item_count["#primary"] += crud_batch_size
                index_item_count["durable_add_aborts"] += crud_batch_size
                expected_num_indexed["#primary"] += crud_batch_size
                expected_num_indexed["durable_add_aborts"] += crud_batch_size
                self.validate_indexed_doc_count(def_bucket, index_item_count)

            for gen_load in load_gen["SET"][server]:
                task = self.task.async_load_gen_docs(
                    self.cluster,
                    def_bucket,
                    gen_load,
                    "update",
                    0,
                    batch_size=50,
                    process_concurrency=8,
                    replicate_to=self.replicate_to,
                    persist_to=self.persist_to,
                    durability=self.durability_level,
                    timeout_secs=self.sdk_timeout)
                self.task.jython_task_manager.get_task_result(task)

                if len(task.fail.keys()) != 0:
                    self.log_failure("Some failures seen during doc_ops")

                index_item_count["durable_add_aborts"] -= crud_batch_size
                index_item_count["durable_set_aborts"] += crud_batch_size
                expected_num_indexed["#primary"] += crud_batch_size
                expected_num_indexed["durable_add_aborts"] += crud_batch_size
                expected_num_indexed["durable_set_aborts"] += crud_batch_size
                self.validate_indexed_doc_count(def_bucket, index_item_count)

        self.log.info("Validate the mutated docs are taken into indexing")
        self.validate_indexed_count_from_stats(def_bucket,
                                               expected_num_indexed,
                                               index_item_count)
        self.validate_test_failure()
Ejemplo n.º 7
0
class UpgradeTests(UpgradeBase):
    def setUp(self):
        super(UpgradeTests, self).setUp()
        self.durability_helper = DurabilityHelper(
            self.log,
            len(self.cluster.nodes_in_cluster))
        self.verification_dict = dict()
        self.verification_dict["ops_create"] = self.num_items
        self.verification_dict["ops_delete"] = 0

    def tearDown(self):
        super(UpgradeTests, self).tearDown()

    def test_upgrade(self):
        create_batch_size = 10000
        update_task = None

        if self.upgrade_with_data_load:
            self.log.info("Starting async doc updates")
            update_task = self.task.async_continuous_doc_ops(
                self.cluster, self.bucket, self.gen_load,
                op_type="update",
                process_concurrency=1,
                persist_to=1,
                replicate_to=1,
                timeout_secs=30)

        create_gen = doc_generator(self.key, self.num_items,
                                   self.num_items+create_batch_size)
        self.log.info("Upgrading cluster nodes to target version")
        node_to_upgrade = self.fetch_node_to_upgrade()
        while node_to_upgrade is not None:
            self.log.info("Selected node for upgrade: %s"
                          % node_to_upgrade.ip)
            self.upgrade_function[self.upgrade_type](node_to_upgrade,
                                                     self.upgrade_version)
            self.cluster_util.print_cluster_stats()

            try:
                self.cluster.update_master(self.cluster.servers[0])
            except Exception:
                self.cluster.update_master(
                    self.cluster.servers[self.nodes_init-1])
            # Validate sync_write results after upgrade
            if self.atomicity:
                create_batch_size = 10
                create_gen = doc_generator(
                    self.key,
                    self.num_items,
                    self.num_items+create_batch_size)
                sync_write_task = self.task.async_load_gen_docs_atomicity(
                    self.cluster, self.bucket_util.buckets,
                    create_gen, "create",
                    process_concurrency=1,
                    transaction_timeout=self.transaction_timeout,
                    record_fail=True)
            else:
                sync_write_task = self.task.async_load_gen_docs(
                    self.cluster, self.bucket, create_gen, "create",
                    durability=self.durability_level,
                    timeout_secs=self.sdk_timeout,
                    process_concurrency=4,
                    skip_read_on_error=True,
                    suppress_error_table=True)
            self.task_manager.get_task_result(sync_write_task)

            node_to_upgrade = self.fetch_node_to_upgrade()
            if self.atomicity:
                self.sleep(10)
                current_items = self.bucket_util.get_bucket_current_item_count(
                    self.cluster, self.bucket)
                if node_to_upgrade is None:
                    if current_items < self.num_items+create_batch_size:
                        self.log_failure(
                            "Failures after cluster upgrade {} {}"
                            .format(current_items,
                                    self.num_items+create_batch_size))
                elif current_items > self.num_items:
                    self.log_failure(
                        "SyncWrite succeeded with mixed mode cluster")
            else:
                if node_to_upgrade is None:
                    if sync_write_task.fail.keys():
                        self.log_failure("Failures after cluster upgrade")
                elif len(sync_write_task.fail.keys()) != create_batch_size:
                    self.log_failure(
                        "SyncWrite succeeded with mixed mode cluster")
                    break
                else:
                    for doc_id, doc_result in sync_write_task.fail.items():
                        if SDKException.FeatureNotAvailableException \
                                not in str(doc_result["error"]):
                            self.log_failure("Invalid exception for %s: %s"
                                             % (doc_id, doc_result))

            # Halt further upgrade if test has failed during current upgrade
            if self.test_failure is True:
                break

        if self.upgrade_with_data_load:
            # Wait for update_task to complete
            update_task.end_task()
            self.task_manager.get_task_result(update_task)

        self.validate_test_failure()

    def test_bucket_durability_upgrade(self):
        update_task = None
        self.sdk_timeout = 60
        create_batch_size = 10000
        if self.atomicity:
            create_batch_size = 10

        # To make sure sync_write can we supported by initial cluster version
        sync_write_support = True
        if float(self.initial_version[0:3]) < 6.5:
            sync_write_support = False

        if sync_write_support:
            self.verification_dict["rollback_item_count"] = 0
            self.verification_dict["sync_write_aborted_count"] = 0

        if self.upgrade_with_data_load:
            self.log.info("Starting async doc updates")
            update_task = self.task.async_continuous_doc_ops(
                self.cluster, self.bucket, self.gen_load,
                op_type="update",
                process_concurrency=1,
                persist_to=1,
                replicate_to=1,
                timeout_secs=30)

        self.log.info("Upgrading cluster nodes to target version")
        node_to_upgrade = self.fetch_node_to_upgrade()
        while node_to_upgrade is not None:
            self.log.info("Selected node for upgrade: %s"
                          % node_to_upgrade.ip)
            self.upgrade_function[self.upgrade_type](node_to_upgrade,
                                                     self.upgrade_version)
            try:
                self.cluster.update_master(self.cluster.servers[0])
            except Exception:
                self.cluster.update_master(
                    self.cluster.servers[self.nodes_init-1])

            create_gen = doc_generator(self.key, self.num_items,
                                       self.num_items+create_batch_size)
            # Validate sync_write results after upgrade
            if self.atomicity:
                sync_write_task = self.task.async_load_gen_docs_atomicity(
                    self.cluster, self.bucket_util.buckets,
                    create_gen, "create",
                    process_concurrency=1,
                    transaction_timeout=self.transaction_timeout,
                    record_fail=True)
            else:
                sync_write_task = self.task.async_load_gen_docs(
                    self.cluster, self.bucket, create_gen, "create",
                    timeout_secs=self.sdk_timeout,
                    process_concurrency=4,
                    skip_read_on_error=True,
                    suppress_error_table=True)
            self.task_manager.get_task_result(sync_write_task)
            self.num_items += create_batch_size

            retry_index = 0
            while retry_index < 5:
                self.sleep(3, "Wait for num_items to match")
                current_items = self.bucket_util.get_bucket_current_item_count(
                    self.cluster, self.bucket)
                if current_items == self.num_items:
                    break
                self.log.debug("Num_items mismatch. Expected: %s, Actual: %s"
                               % (self.num_items, current_items))
            # Doc count validation
            self.cluster_util.print_cluster_stats()

            self.verification_dict["ops_create"] += create_batch_size
            self.summary.add_step("Upgrade %s" % node_to_upgrade.ip)

            # Halt further upgrade if test has failed during current upgrade
            if self.test_failure:
                break

            node_to_upgrade = self.fetch_node_to_upgrade()

        if self.upgrade_with_data_load:
            # Wait for update_task to complete
            update_task.end_task()
            self.task_manager.get_task_result(update_task)
        else:
            self.verification_dict["ops_update"] = 0

        # Cb_stats vb-details validation
        failed = self.durability_helper.verify_vbucket_details_stats(
            self.bucket_util.buckets[0],
            self.cluster_util.get_kv_nodes(),
            vbuckets=self.cluster_util.vbuckets,
            expected_val=self.verification_dict)
        if failed:
            self.log_failure("Cbstat vbucket-details validation failed")
        self.summary.add_step("Cbstats vb-details verification")

        self.validate_test_failure()

        possible_d_levels = dict()
        possible_d_levels[Bucket.Type.MEMBASE] = \
            self.bucket_util.get_supported_durability_levels()
        possible_d_levels[Bucket.Type.EPHEMERAL] = [
            Bucket.DurabilityLevel.NONE,
            Bucket.DurabilityLevel.MAJORITY]
        len_possible_d_levels = len(possible_d_levels[self.bucket_type]) - 1

        if not sync_write_support:
            self.verification_dict["rollback_item_count"] = 0
            self.verification_dict["sync_write_aborted_count"] = 0

        # Perform bucket_durability update
        key, value = doc_generator("b_durability_doc", 0, 1).next()
        client = SDKClient([self.cluster.master], self.bucket_util.buckets[0])
        for index, d_level in enumerate(possible_d_levels[self.bucket_type]):
            self.log.info("Updating bucket_durability=%s" % d_level)
            self.bucket_util.update_bucket_property(
                self.bucket_util.buckets[0],
                bucket_durability=BucketDurability[d_level])
            self.bucket_util.print_bucket_stats()

            buckets = self.bucket_util.get_all_buckets()
            if buckets[0].durability_level != BucketDurability[d_level]:
                self.log_failure("New bucket_durability not taken")

            self.summary.add_step("Update bucket_durability=%s" % d_level)

            self.sleep(10, "MB-39678: Bucket_d_level change to take effect")

            if index == 0:
                op_type = "create"
                self.verification_dict["ops_create"] += 1
            elif index == len_possible_d_levels:
                op_type = "delete"
                self.verification_dict["ops_delete"] += 1
            else:
                op_type = "update"
                if "ops_update" in self.verification_dict:
                    self.verification_dict["ops_update"] += 1

            result = client.crud(op_type, key, value,
                                 timeout=self.sdk_timeout)
            if result["status"] is False:
                self.log_failure("Doc_op %s failed on key %s: %s"
                                 % (op_type, key, result["error"]))
            self.summary.add_step("Doc_op %s" % op_type)
        client.close()

        # Cb_stats vb-details validation
        failed = self.durability_helper.verify_vbucket_details_stats(
            self.bucket_util.buckets[0],
            self.cluster_util.get_kv_nodes(),
            vbuckets=self.cluster_util.vbuckets,
            expected_val=self.verification_dict)
        if failed:
            self.log_failure("Cbstat vbucket-details validation failed")
        self.summary.add_step("Cbstats vb-details verification")
        self.validate_test_failure()

    def test_transaction_doc_isolation(self):
        def run_transaction_updates():
            self.log.info("Starting transaction updates in parallel")
            while not stop_thread:
                commit_trans = choice([True, False])
                trans_update_task = self.task.async_load_gen_docs_atomicity(
                    self.cluster, self.bucket_util.buckets,
                    self.gen_load, "update", exp=self.maxttl,
                    batch_size=50,
                    process_concurrency=3,
                    timeout_secs=self.sdk_timeout,
                    update_count=self.update_count,
                    transaction_timeout=self.transaction_timeout,
                    commit=commit_trans,
                    durability=self.durability_level,
                    sync=self.sync, defer=self.defer,
                    retries=0)
                self.task_manager.get_task_result(trans_update_task)

        stop_thread = False
        update_task = None
        self.sdk_timeout = 60

        self.log.info("Upgrading cluster nodes to target version")
        node_to_upgrade = self.fetch_node_to_upgrade()
        while node_to_upgrade is not None:
            self.log.info("Selected node for upgrade: %s"
                          % node_to_upgrade.ip)
            if self.upgrade_with_data_load:
                update_task = Thread(target=run_transaction_updates)
                update_task.start()

            self.upgrade_function[self.upgrade_type](node_to_upgrade,
                                                     self.upgrade_version)
            try:
                self.cluster.update_master(self.cluster.servers[0])
            except Exception:
                self.cluster.update_master(
                    self.cluster.servers[self.nodes_init-1])

            if self.upgrade_with_data_load:
                stop_thread = True
                update_task.join()

            self.cluster_util.print_cluster_stats()
            self.bucket_util.print_bucket_stats()

            self.summary.add_step("Upgrade %s" % node_to_upgrade.ip)

            # Halt further upgrade if test has failed during current upgrade
            if self.test_failure:
                break

            node_to_upgrade = self.fetch_node_to_upgrade()
            for bucket in self.bucket_util.get_all_buckets():
                tombstone_doc_supported = \
                    "tombstonedUserXAttrs" in bucket.bucketCapabilities
                if node_to_upgrade is None and not tombstone_doc_supported:
                    self.log_failure("Tombstone docs not added to %s "
                                     "capabilities" % bucket.name)
                elif node_to_upgrade is not None and tombstone_doc_supported:
                    self.log_failure("Tombstone docs supported for %s before "
                                     "cluster upgrade" % bucket.name)

        self.validate_test_failure()

        create_gen = doc_generator(self.key, self.num_items,
                                   self.num_items*2)
        # Start transaction load after node upgrade
        trans_task = self.task.async_load_gen_docs_atomicity(
            self.cluster, self.bucket_util.buckets,
            create_gen, "create", exp=self.maxttl,
            batch_size=50,
            process_concurrency=8,
            timeout_secs=self.sdk_timeout,
            update_count=self.update_count,
            transaction_timeout=self.transaction_timeout,
            commit=True,
            durability=self.durability_level,
            sync=self.sync, defer=self.defer,
            retries=0)
        self.task_manager.get_task_result(trans_task)
Ejemplo n.º 8
0
class CrashTest(BaseTestCase):
    def setUp(self):
        super(CrashTest, self).setUp()

        self.doc_ops = self.input.param("doc_ops", None)
        self.process_name = self.input.param("process", None)
        self.service_name = self.input.param("service", "data")
        self.sig_type = self.input.param("sig_type", "SIGKILL").upper()
        self.target_node = self.input.param("target_node", "active")

        self.pre_warmup_stats = {}
        self.timeout = 120
        self.new_docs_to_add = 10000

        if self.doc_ops is not None:
            self.doc_ops = self.doc_ops.split(";")

        nodes_init = self.cluster.servers[1:self.nodes_init] \
            if self.nodes_init != 1 else []
        self.task.rebalance([self.cluster.master], nodes_init, [])
        self.cluster.nodes_in_cluster.extend([self.cluster.master] +
                                             nodes_init)
        if not self.atomicity:
            self.durability_helper = DurabilityHelper(
                self.log,
                self.nodes_init,
                durability=self.durability_level,
                replicate_to=self.replicate_to,
                persist_to=self.persist_to)
        self.bucket_util.create_default_bucket(
            bucket_type=self.bucket_type,
            ram_quota=self.bucket_size,
            replica=self.num_replicas,
            compression_mode="off",
            storage=self.bucket_storage,
            eviction_policy=self.bucket_eviction_policy)
        self.bucket_util.add_rbac_user()

        verification_dict = dict()
        verification_dict["ops_create"] = self.num_items
        verification_dict["sync_write_aborted_count"] = 0
        verification_dict["rollback_item_count"] = 0
        verification_dict["pending_writes"] = 0
        if self.durability_level:
            verification_dict["sync_write_committed_count"] = self.num_items

        # Load initial documents into the buckets
        gen_create = doc_generator(self.key,
                                   0,
                                   self.num_items,
                                   key_size=self.key_size,
                                   doc_size=self.doc_size,
                                   doc_type=self.doc_type,
                                   target_vbucket=self.target_vbucket,
                                   vbuckets=self.cluster_util.vbuckets)
        if self.atomicity:
            task = self.task.async_load_gen_docs_atomicity(
                self.cluster,
                self.bucket_util.buckets,
                gen_create,
                "create",
                exp=0,
                batch_size=10,
                process_concurrency=self.process_concurrency,
                replicate_to=self.replicate_to,
                persist_to=self.persist_to,
                durability=self.durability_level,
                timeout_secs=self.sdk_timeout,
                update_count=self.update_count,
                transaction_timeout=self.transaction_timeout,
                commit=True,
                sync=self.sync)
            self.task.jython_task_manager.get_task_result(task)
        else:
            for bucket in self.bucket_util.buckets:
                task = self.task.async_load_gen_docs(
                    self.cluster,
                    bucket,
                    gen_create,
                    "create",
                    self.maxttl,
                    persist_to=self.persist_to,
                    replicate_to=self.replicate_to,
                    durability=self.durability_level,
                    batch_size=10,
                    process_concurrency=8)
                self.task.jython_task_manager.get_task_result(task)

                self.bucket_util._wait_for_stats_all_buckets()
                # Verify cbstats vbucket-details
                stats_failed = \
                    self.durability_helper.verify_vbucket_details_stats(
                        bucket, self.cluster_util.get_kv_nodes(),
                        vbuckets=self.cluster_util.vbuckets,
                        expected_val=verification_dict)

                if stats_failed:
                    self.fail("Cbstats verification failed")

            self.bucket_util.verify_stats_all_buckets(self.num_items)
        self.log.info("==========Finished CrashTest setup========")

    def tearDown(self):
        super(CrashTest, self).tearDown()

    def getTargetNode(self):
        if len(self.cluster.nodes_in_cluster) > 1:
            return self.cluster.nodes_in_cluster[randint(
                0, self.nodes_init - 1)]
        return self.cluster.master

    def getVbucketNumbers(self, shell_conn, bucket_name, replica_type):
        cb_stats = Cbstats(shell_conn)
        return cb_stats.vbucket_list(bucket_name, replica_type)

    def test_stop_process(self):
        """
        1. Starting loading docs into the default bucket
        2. Stop the requested process, which will impact the
           memcached operations
        3. Wait for load bucket task to complete
        4. Validate the docs for durability
        """
        error_to_simulate = self.input.param("simulate_error", None)
        def_bucket = self.bucket_util.buckets[0]
        target_node = self.getTargetNode()
        remote = RemoteMachineShellConnection(target_node)
        error_sim = CouchbaseError(self.log, remote)
        target_vbuckets = self.getVbucketNumbers(remote, def_bucket.name,
                                                 self.target_node)
        if len(target_vbuckets) == 0:
            self.log.error("No target vbucket list generated to load data")
            remote.disconnect()
            return

        # Create doc_generator targeting only the active/replica vbuckets
        # present in the target_node
        gen_load = doc_generator(self.key,
                                 self.num_items,
                                 self.new_docs_to_add,
                                 key_size=self.key_size,
                                 doc_size=self.doc_size,
                                 doc_type=self.doc_type,
                                 target_vbucket=target_vbuckets,
                                 vbuckets=self.cluster_util.vbuckets)

        if self.atomicity:
            task = self.task.async_load_gen_docs_atomicity(
                self.cluster,
                self.bucket_util.buckets,
                gen_load,
                "create",
                exp=0,
                batch_size=10,
                process_concurrency=self.process_concurrency,
                replicate_to=self.replicate_to,
                persist_to=self.persist_to,
                durability=self.durability_level,
                timeout_secs=self.sdk_timeout,
                update_count=self.update_count,
                transaction_timeout=self.transaction_timeout,
                commit=True,
                sync=self.sync)
        else:
            task = self.task.async_load_gen_docs(
                self.cluster,
                def_bucket,
                gen_load,
                "create",
                exp=0,
                batch_size=1,
                process_concurrency=8,
                replicate_to=self.replicate_to,
                persist_to=self.persist_to,
                durability=self.durability_level,
                timeout_secs=self.sdk_timeout,
                skip_read_on_error=True)

        # Induce the error condition
        error_sim.create(error_to_simulate)

        self.sleep(20, "Wait before reverting the error condition")
        # Revert the simulated error condition and close the ssh session
        error_sim.revert(error_to_simulate)
        remote.disconnect()

        # Wait for doc loading task to complete
        self.task.jython_task_manager.get_task_result(task)
        if not self.atomicity:
            if len(task.fail.keys()) != 0:
                if self.target_node == "active" or self.num_replicas in [2, 3]:
                    self.log_failure("Unwanted failures for keys: %s" %
                                     task.fail.keys())

            validate_passed = \
                self.durability_helper.validate_durability_exception(
                    task.fail,
                    SDKException.DurabilityAmbiguousException)
            if not validate_passed:
                self.log_failure("Unwanted exception seen during validation")

            # Create SDK connection for CRUD retries
            sdk_client = SDKClient([self.cluster.master], def_bucket)
            for doc_key, crud_result in task.fail.items():
                result = sdk_client.crud("create",
                                         doc_key,
                                         crud_result["value"],
                                         replicate_to=self.replicate_to,
                                         persist_to=self.persist_to,
                                         durability=self.durability_level,
                                         timeout=self.sdk_timeout)
                if result["status"] is False:
                    self.log_failure("Retry of doc_key %s failed: %s" %
                                     (doc_key, result["error"]))
            # Close the SDK connection
            sdk_client.close()

        # Update self.num_items
        self.num_items += self.new_docs_to_add

        if not self.atomicity:
            # Validate doc count
            self.bucket_util._wait_for_stats_all_buckets()
            self.bucket_util.verify_stats_all_buckets(self.num_items)

        self.validate_test_failure()

    def test_crash_process(self):
        """
        1. Starting loading docs into the default bucket
        2. Crash the requested process, which will not impact the
           memcached operations
        3. Wait for load bucket task to complete
        4. Validate the docs for durability
        """
        def_bucket = self.bucket_util.buckets[0]
        target_node = self.getTargetNode()
        remote = RemoteMachineShellConnection(target_node)
        target_vbuckets = range(0, self.cluster_util.vbuckets)
        retry_exceptions = list()

        # If Memcached is killed, we should not perform KV ops on
        # particular node. If not we can target all nodes for KV operation.
        if self.process_name == "memcached":
            target_vbuckets = self.getVbucketNumbers(remote, def_bucket.name,
                                                     self.target_node)
            if self.target_node == "active":
                retry_exceptions = [SDKException.TimeoutException]
        if len(target_vbuckets) == 0:
            self.log.error("No target vbucket list generated to load data")
            remote.disconnect()
            return

        # Create doc_generator targeting only the active/replica vbuckets
        # present in the target_node
        gen_load = doc_generator(self.key,
                                 self.num_items,
                                 self.new_docs_to_add,
                                 key_size=self.key_size,
                                 doc_size=self.doc_size,
                                 doc_type=self.doc_type,
                                 target_vbucket=target_vbuckets,
                                 vbuckets=self.cluster_util.vbuckets)
        if self.atomicity:
            task = self.task.async_load_gen_docs_atomicity(
                self.cluster,
                self.bucket_util.buckets,
                gen_load,
                "create",
                exp=0,
                batch_size=10,
                process_concurrency=self.process_concurrency,
                replicate_to=self.replicate_to,
                persist_to=self.persist_to,
                durability=self.durability_level,
                timeout_secs=self.sdk_timeout,
                update_count=self.update_count,
                transaction_timeout=self.transaction_timeout,
                commit=True,
                sync=self.sync)
        else:
            task = self.task.async_load_gen_docs(
                self.cluster,
                def_bucket,
                gen_load,
                "create",
                exp=0,
                batch_size=10,
                process_concurrency=8,
                replicate_to=self.replicate_to,
                persist_to=self.persist_to,
                durability=self.durability_level,
                timeout_secs=self.sdk_timeout,
                skip_read_on_error=True)

        task_info = dict()
        task_info[task] = self.bucket_util.get_doc_op_info_dict(
            def_bucket,
            "create",
            0,
            replicate_to=self.replicate_to,
            persist_to=self.persist_to,
            durability=self.durability_level,
            timeout=self.sdk_timeout,
            time_unit="seconds",
            retry_exceptions=retry_exceptions)

        self.sleep(10, "Wait for doc_ops to start")
        self.log.info("Killing {0}:{1} on node {2}".format(
            self.process_name, self.service_name, target_node.ip))
        remote.kill_process(self.process_name,
                            self.service_name,
                            signum=signum[self.sig_type])
        remote.disconnect()
        # Wait for tasks completion and validate failures
        if self.atomicity:
            self.task.jython_task_manager.get_task_result(task)
        if not self.atomicity:
            self.bucket_util.verify_doc_op_task_exceptions(
                task_info, self.cluster)
            self.bucket_util.log_doc_ops_task_failures(task_info)

        # Update self.num_items
        self.num_items += self.new_docs_to_add

        # Verification stats
        verification_dict = dict()
        verification_dict["ops_create"] = self.num_items
        verification_dict["sync_write_aborted_count"] = 0
        verification_dict["rollback_item_count"] = 0
        verification_dict["pending_writes"] = 0
        if self.durability_level:
            verification_dict["sync_write_committed_count"] = self.num_items

        if self.bucket_type == Bucket.Type.EPHEMERAL \
                and self.process_name == "memcached":
            self.sleep(10, "Wait for memcached to recover from the crash")
            result = self.task.rebalance(self.servers[:self.nodes_init], [],
                                         [])
            self.assertTrue(result, "Rebalance failed")

        # Validate doc count
        if not self.atomicity:
            self.bucket_util._wait_for_stats_all_buckets()
            self.bucket_util.verify_stats_all_buckets(self.num_items)

            if self.process_name != "memcached":
                stats_failed = \
                    self.durability_helper.verify_vbucket_details_stats(
                        def_bucket, self.cluster_util.get_kv_nodes(),
                        vbuckets=self.cluster_util.vbuckets, expected_val=verification_dict)
                if stats_failed:
                    self.fail("Cbstats verification failed")
Ejemplo n.º 9
0
    def test_fts_index_with_aborts(self):
        """
        1. Create index (2i/view) on default bucket
        2. Load multiple docs such that all sync_writes will be aborted
        3. Verify nothing went into indexing
        4. Load sync_write docs such that they are successful
        5. Validate the mutated docs are taken into indexing
        :return:
        """
        self.key = "test_query_doc"
        self.index_name = "fts_test_index"
        self.sync_write_abort_pattern = self.input.param(
            "sync_write_abort_pattern", "all_aborts")
        self.create_index_during = self.input.param("create_index_during",
                                                    "before_doc_ops")
        self.restServer = self.cluster_util.get_nodes_from_services_map(
            cluster=self.cluster, service_type=CbServer.Services.FTS)
        self.rest = RestConnection(self.restServer)
        crud_batch_size = 1000
        def_bucket = self.cluster.buckets[0]
        kv_nodes = self.cluster_util.get_kv_nodes(self.cluster)
        replica_vbs = dict()
        verification_dict = dict()
        index_item_count = dict()
        expected_num_indexed = dict()
        load_gen = dict()
        load_gen["ADD"] = dict()
        load_gen["SET"] = dict()
        partial_aborts = ["initial_aborts", "aborts_at_end"]

        durability_helper = DurabilityHelper(
            self.log,
            len(self.cluster.nodes_in_cluster),
            durability=self.durability_level,
            replicate_to=self.replicate_to,
            persist_to=self.persist_to)

        if self.create_index_during == "before_doc_ops":
            self.create_fts_indexes(def_bucket.name, self.index_name)

        curr_items = self.bucket_util.get_bucket_current_item_count(
            self.cluster, def_bucket)
        if self.sync_write_abort_pattern in ["all_aborts", "initial_aborts"]:
            self.bucket_util.flush_bucket(self.cluster, def_bucket)
            self.num_items = 0
        else:
            self.num_items = curr_items

        self.log.info("Disabling auto_failover to avoid node failures")
        status = RestConnection(self.cluster.master) \
            .update_autofailover_settings(False, 120, False)
        self.assertTrue(status, msg="Failure during disabling auto-failover")

        # Validate vbucket stats
        verification_dict["ops_create"] = self.num_items
        verification_dict["ops_update"] = 0
        # verification_dict["ops_delete"] = 0
        verification_dict["rollback_item_count"] = 0
        verification_dict["sync_write_aborted_count"] = 0
        verification_dict["sync_write_committed_count"] = 0

        if self.create_index_during == "before_doc_ops":
            self.validate_indexed_doc_count(self.index_name,
                                            verification_dict["ops_create"])

        self.log.info("Loading docs such that all sync_writes will be aborted")
        for server in kv_nodes:
            ssh_shell = RemoteMachineShellConnection(server)
            cbstats = Cbstats(ssh_shell)
            replica_vbs[server] = cbstats.vbucket_list(def_bucket.name,
                                                       "replica")
            load_gen["ADD"][server] = list()
            load_gen["ADD"][server].append(
                doc_generator(self.key,
                              0,
                              crud_batch_size,
                              target_vbucket=replica_vbs[server],
                              mutation_type="ADD"))
            if self.sync_write_abort_pattern in partial_aborts:
                load_gen["ADD"][server].append(
                    doc_generator(self.key,
                                  10000,
                                  crud_batch_size,
                                  target_vbucket=replica_vbs[server],
                                  mutation_type="ADD"))
                verification_dict["ops_create"] += crud_batch_size
                verification_dict["sync_write_committed_count"] += \
                    crud_batch_size

            task_success = self.bucket_util.load_durable_aborts(
                ssh_shell, load_gen["ADD"][server], def_bucket,
                self.durability_level, "create", self.sync_write_abort_pattern)
            if not task_success:
                self.log_failure("Failure during load_abort task")

            verification_dict["sync_write_aborted_count"] += \
                crud_batch_size
            if self.create_index_during == "before_doc_ops":
                self.validate_indexed_doc_count(
                    self.index_name, verification_dict["ops_create"])

            load_gen["SET"][server] = list()
            load_gen["SET"][server].append(
                doc_generator(self.key,
                              0,
                              crud_batch_size,
                              target_vbucket=replica_vbs[server],
                              mutation_type="SET"))
            if self.sync_write_abort_pattern in partial_aborts:
                load_gen["SET"][server].append(
                    doc_generator(self.key,
                                  10000,
                                  crud_batch_size,
                                  target_vbucket=replica_vbs[server],
                                  mutation_type="SET"))
                verification_dict["ops_update"] += crud_batch_size
                verification_dict["sync_write_committed_count"] += \
                    crud_batch_size

            verification_dict["sync_write_aborted_count"] += \
                crud_batch_size
            task_success = self.bucket_util.load_durable_aborts(
                ssh_shell, load_gen["SET"][server], def_bucket,
                self.durability_level, "update", self.sync_write_abort_pattern)
            if not task_success:
                self.log_failure("Failure during load_abort task")

            ssh_shell.disconnect()

            if self.create_index_during == "before_doc_ops":
                self.validate_indexed_doc_count(
                    self.index_name, verification_dict["ops_create"])
        failed = durability_helper.verify_vbucket_details_stats(
            def_bucket,
            kv_nodes,
            vbuckets=self.cluster.vbuckets,
            expected_val=verification_dict)
        # if failed:
        #     self.sleep(6000)
        #     self.log_failure("Cbstat vbucket-details verification failed")
        self.validate_test_failure()

        if self.create_index_during == "after_doc_ops":
            self.create_fts_indexes(def_bucket.name, self.index_name)
            self.validate_indexed_doc_count(self.index_name,
                                            verification_dict["ops_create"])

        self.log.info("Verify aborts are not indexed")
        self.validate_indexed_doc_count(self.index_name,
                                        verification_dict["ops_create"])

        for server in kv_nodes:
            if self.sync_write_abort_pattern == "initial_aborts":
                load_gen["ADD"][server] = load_gen["ADD"][server][:1]
                load_gen["SET"][server] = load_gen["SET"][server][:1]
            elif self.sync_write_abort_pattern == "aborts_at_end":
                load_gen["ADD"][server] = load_gen["ADD"][server][-1:]
                load_gen["SET"][server] = load_gen["SET"][server][-1:]

        self.log.info("Load sync_write docs such that they are successful")
        for server in kv_nodes:
            for gen_load in load_gen["ADD"][server]:
                task = self.task.async_load_gen_docs(
                    self.cluster,
                    def_bucket,
                    gen_load,
                    "create",
                    0,
                    batch_size=50,
                    process_concurrency=8,
                    replicate_to=self.replicate_to,
                    persist_to=self.persist_to,
                    durability=self.durability_level,
                    timeout_secs=self.sdk_timeout)
                self.task.jython_task_manager.get_task_result(task)
                if len(task.fail.keys()) != 0:
                    self.log_failure("Some failures seen during doc_ops")
                verification_dict["ops_create"] += crud_batch_size
                self.validate_indexed_doc_count(
                    self.index_name, verification_dict["ops_create"])

            for gen_load in load_gen["SET"][server]:
                task = self.task.async_load_gen_docs(
                    self.cluster,
                    def_bucket,
                    gen_load,
                    "update",
                    0,
                    batch_size=50,
                    process_concurrency=8,
                    replicate_to=self.replicate_to,
                    persist_to=self.persist_to,
                    durability=self.durability_level,
                    timeout_secs=self.sdk_timeout)
                self.task.jython_task_manager.get_task_result(task)
                if len(task.fail.keys()) != 0:
                    self.log_failure("Some failures seen during doc_ops")
                verification_dict["ops_update"] += crud_batch_size
                self.validate_indexed_doc_count(
                    self.index_name, verification_dict["ops_create"])

        self.log.info("Validate the mutated docs are taken into indexing")
        self.validate_indexed_doc_count(self.index_name,
                                        verification_dict["ops_create"])
        self.validate_test_failure()
Ejemplo n.º 10
0
class basic_ops(BaseTestCase):
    def setUp(self):
        super(basic_ops, self).setUp()

        self.key = 'test_docs'.rjust(self.key_size, '0')

        nodes_init = self.cluster.servers[1:self.nodes_init] \
            if self.nodes_init != 1 else []
        self.task.rebalance([self.cluster.master], nodes_init, [])
        self.cluster.nodes_in_cluster.extend([self.cluster.master] +
                                             nodes_init)
        self.bucket_util.create_default_bucket(
            replica=self.num_replicas,
            compression_mode=self.compression_mode,
            bucket_type=self.bucket_type)
        self.bucket_util.add_rbac_user()

        self.src_bucket = self.bucket_util.get_all_buckets()
        self.durability_helper = DurabilityHelper(
            self.log,
            len(self.cluster.nodes_in_cluster),
            durability=self.durability_level,
            replicate_to=self.replicate_to,
            persist_to=self.persist_to)
        # Reset active_resident_threshold to avoid further data load as DGM
        self.active_resident_threshold = 0
        self.cluster_util.print_cluster_stats()
        self.bucket_util.print_bucket_stats()
        self.log.info("==========Finished Basic_ops base setup========")

    def tearDown(self):
        super(basic_ops, self).tearDown()

    def do_basic_ops(self):
        KEY_NAME = 'key1'
        KEY_NAME2 = 'key2'
        self.log.info('Starting basic ops')

        rest = RestConnection(self.cluster.master)
        default_bucket = self.bucket_util.get_all_buckets()[0]
        smart_client = VBucketAwareMemcached(rest, default_bucket)
        sdk_client = smart_client.get_client()
        # mcd = client.memcached(KEY_NAME)

        # MB-17231 - incr with full eviction
        rc = sdk_client.incr(KEY_NAME, delta=1)
        self.log.info('rc for incr: {0}'.format(rc))

        # MB-17289 del with meta
        rc = sdk_client.set(KEY_NAME, 0, 0, json.dumps({'value': 'value2'}))
        self.log.info('set is: {0}'.format(rc))
        # cas = rc[1]

        # wait for it to persist
        persisted = 0
        while persisted == 0:
            opaque, rep_time, persist_time, persisted, cas = sdk_client.observe(
                KEY_NAME)

        try:
            rc = sdk_client.evict_key(KEY_NAME)
        except MemcachedError as exp:
            self.fail("Exception with evict meta - {0}".format(exp))

        CAS = 0xabcd
        try:
            # key, exp, flags, seqno, cas
            rc = mcd.del_with_meta(KEY_NAME2, 0, 0, 2, CAS)
        except MemcachedError as exp:
            self.fail("Exception with del_with meta - {0}".format(exp))

    # Reproduce test case for MB-28078
    def do_setWithMeta_twice(self):
        mc = MemcachedClient(self.cluster.master.ip, 11210)
        mc.sasl_auth_plain(self.cluster.master.rest_username,
                           self.cluster.master.rest_password)
        mc.bucket_select('default')

        try:
            mc.setWithMeta('1', '{"Hello":"World"}', 3600, 0, 1,
                           0x1512a3186faa0000)
        except MemcachedError as error:
            self.log.info("<MemcachedError #%d ``%s''>" %
                          (error.status, error.message))
            self.fail("Error on First setWithMeta()")

        stats = mc.stats()
        self.log.info('curr_items: {0} and curr_temp_items:{1}'.format(
            stats['curr_items'], stats['curr_temp_items']))
        self.log.info("Sleeping for 5 and checking stats again")
        time.sleep(5)
        stats = mc.stats()
        self.log.info('curr_items: {0} and curr_temp_items:{1}'.format(
            stats['curr_items'], stats['curr_temp_items']))

        try:
            mc.setWithMeta('1', '{"Hello":"World"}', 3600, 0, 1,
                           0x1512a3186faa0000)
        except MemcachedError as error:
            stats = mc.stats()
            self.log.info(
                'After 2nd setWithMeta(), curr_items: {} and curr_temp_items:{}'
                .format(stats['curr_items'], stats['curr_temp_items']))
            if int(stats['curr_temp_items']) == 1:
                self.fail(
                    "Error on second setWithMeta(), expected curr_temp_items to be 0"
                )
            else:
                self.log.info("<MemcachedError #%d ``%s''>" %
                              (error.status, error.message))

    def generate_docs_bigdata(self,
                              docs_per_day,
                              start=0,
                              document_size=1024000):
        json_generator = JsonGenerator()
        return json_generator.generate_docs_bigdata(start=start,
                                                    end=docs_per_day,
                                                    value_size=document_size)

    def test_doc_size(self):
        def check_durability_failures():
            self.log.error(task.sdk_acked_curd_failed.keys())
            self.log.error(task.sdk_exception_crud_succeed.keys())
            self.assertTrue(
                len(task.sdk_acked_curd_failed) == 0,
                "Durability failed for docs: %s" %
                task.sdk_acked_curd_failed.keys())
            self.assertTrue(
                len(task.sdk_exception_crud_succeed) == 0,
                "Durability failed for docs: %s" %
                task.sdk_acked_curd_failed.keys())

        """
        Basic tests for document CRUD operations using JSON docs
        """
        doc_op = self.input.param("doc_op", None)
        def_bucket = self.bucket_util.buckets[0]
        ignore_exceptions = list()
        retry_exceptions = list()

        # Stat validation reference variables
        verification_dict = dict()
        ref_val = dict()
        ref_val["ops_create"] = 0
        ref_val["ops_update"] = 0
        ref_val["ops_delete"] = 0
        ref_val["rollback_item_count"] = 0
        ref_val["sync_write_aborted_count"] = 0
        ref_val["sync_write_committed_count"] = 0

        one_less_node = self.nodes_init == self.num_replicas

        if self.durability_level:
            pass
            #ignore_exceptions.append(
            #    "com.couchbase.client.core.error.RequestTimeoutException")

        if self.target_vbucket and type(self.target_vbucket) is not list:
            self.target_vbucket = [self.target_vbucket]

        self.log.info("Creating doc_generator..")
        # Load basic docs into bucket
        doc_create = doc_generator(self.key,
                                   0,
                                   self.num_items,
                                   doc_size=self.doc_size,
                                   doc_type=self.doc_type,
                                   target_vbucket=self.target_vbucket,
                                   vbuckets=self.vbuckets)
        self.log.info("Loading {0} docs into the bucket: {1}".format(
            self.num_items, def_bucket))
        task = self.task.async_load_gen_docs(
            self.cluster,
            def_bucket,
            doc_create,
            "create",
            0,
            batch_size=self.batch_size,
            process_concurrency=self.process_concurrency,
            replicate_to=self.replicate_to,
            persist_to=self.persist_to,
            durability=self.durability_level,
            timeout_secs=self.sdk_timeout,
            ryow=self.ryow,
            check_persistence=self.check_persistence)
        self.task.jython_task_manager.get_task_result(task)

        if self.ryow:
            check_durability_failures()

        # Retry doc_exception code
        self.log.info("Validating failed doc's (if any) exceptions")
        doc_op_info_dict = dict()
        doc_op_info_dict[task] = self.bucket_util.get_doc_op_info_dict(
            def_bucket,
            "create",
            exp=0,
            replicate_to=self.replicate_to,
            persist_to=self.persist_to,
            durability=self.durability_level,
            timeout=self.sdk_timeout,
            time_unit="seconds",
            ignore_exceptions=ignore_exceptions,
            retry_exceptions=retry_exceptions)
        self.bucket_util.verify_doc_op_task_exceptions(doc_op_info_dict,
                                                       self.cluster)

        if len(doc_op_info_dict[task]["unwanted"]["fail"].keys()) != 0:
            self.fail("Failures in retry doc CRUDs: {0}".format(
                doc_op_info_dict[task]["unwanted"]["fail"]))

        self.log.info("Wait for ep_all_items_remaining to become '0'")
        self.bucket_util._wait_for_stats_all_buckets()

        # Update ref_val
        ref_val["ops_create"] = self.num_items + len(task.fail.keys())
        ref_val["sync_write_committed_count"] = self.num_items
        # Validate vbucket stats
        verification_dict["ops_create"] = ref_val["ops_create"]
        verification_dict["rollback_item_count"] = \
            ref_val["rollback_item_count"]
        if self.durability_level:
            verification_dict["sync_write_aborted_count"] = \
                ref_val["sync_write_aborted_count"]
            verification_dict["sync_write_committed_count"] = \
                ref_val["sync_write_committed_count"]

        failed = self.durability_helper.verify_vbucket_details_stats(
            def_bucket,
            self.cluster_util.get_kv_nodes(),
            vbuckets=self.vbuckets,
            expected_val=verification_dict,
            one_less_node=one_less_node)
        if failed:
            self.fail("Cbstat vbucket-details verification failed")

        # Verify initial doc load count
        self.log.info("Validating doc_count in buckets")
        self.bucket_util.verify_stats_all_buckets(self.num_items)

        self.log.info("Creating doc_generator for doc_op")
        num_item_start_for_crud = int(self.num_items / 2)
        doc_update = doc_generator(self.key,
                                   0,
                                   num_item_start_for_crud,
                                   doc_size=self.doc_size,
                                   doc_type=self.doc_type,
                                   target_vbucket=self.target_vbucket,
                                   vbuckets=self.vbuckets)

        expected_num_items = self.num_items
        num_of_mutations = 1

        if doc_op == "update":
            self.log.info("Performing 'update' mutation over the docs")
            task = self.task.async_load_gen_docs(
                self.cluster,
                def_bucket,
                doc_update,
                "update",
                0,
                batch_size=self.batch_size,
                process_concurrency=self.process_concurrency,
                replicate_to=self.replicate_to,
                persist_to=self.persist_to,
                durability=self.durability_level,
                timeout_secs=self.sdk_timeout,
                ryow=self.ryow,
                check_persistence=self.check_persistence)
            self.task.jython_task_manager.get_task_result(task)
            ref_val["ops_update"] = (doc_update.end - doc_update.start +
                                     len(task.fail.keys()))
            if self.durability_level:
                ref_val["sync_write_committed_count"] += \
                    (doc_update.end - doc_update.start)
            if self.ryow:
                check_durability_failures()

            # Read all the values to validate update operation
            task = self.task.async_load_gen_docs(
                self.cluster,
                def_bucket,
                doc_update,
                "read",
                0,
                batch_size=self.batch_size,
                process_concurrency=self.process_concurrency,
                timeout_secs=self.sdk_timeout)
            self.task.jython_task_manager.get_task_result(task)

            op_failed_tbl = TableView(self.log.error)
            op_failed_tbl.set_headers(["Update failed key", "CAS", "Value"])
            for key, value in task.success.items():
                if json.loads(str(value["value"]))["mutated"] != 1:
                    op_failed_tbl.add_row([key, value["cas"], value["value"]])

            op_failed_tbl.display("Update failed for keys:")
            if len(op_failed_tbl.rows) != 0:
                self.fail("Update failed for few keys")
        elif doc_op == "delete":
            self.log.info("Performing 'delete' mutation over the docs")
            task = self.task.async_load_gen_docs(
                self.cluster,
                def_bucket,
                doc_update,
                "delete",
                0,
                batch_size=self.batch_size,
                process_concurrency=self.process_concurrency,
                replicate_to=self.replicate_to,
                persist_to=self.persist_to,
                durability=self.durability_level,
                timeout_secs=self.sdk_timeout,
                ryow=self.ryow,
                check_persistence=self.check_persistence)
            self.task.jython_task_manager.get_task_result(task)
            expected_num_items = self.num_items \
                                 - (self.num_items - num_item_start_for_crud)
            ref_val["ops_delete"] = (doc_update.end - doc_update.start +
                                     len(task.fail.keys()))
            if self.durability_level:
                ref_val["sync_write_committed_count"] += \
                    (doc_update.end - doc_update.start)
            if self.ryow:
                check_durability_failures()

            # Read all the values to validate update operation
            task = self.task.async_load_gen_docs(self.cluster,
                                                 def_bucket,
                                                 doc_update,
                                                 "read",
                                                 0,
                                                 batch_size=10,
                                                 process_concurrency=8,
                                                 timeout_secs=self.sdk_timeout)
            self.task.jython_task_manager.get_task_result(task)

            op_failed_tbl = TableView(self.log.error)
            op_failed_tbl.set_headers(["Delete failed key", "CAS", "Value"])
            for key, value in task.success.items():
                op_failed_tbl.add_row([key, value["cas"], value["value"]])

            op_failed_tbl.display("Delete failed for keys:")
            if len(op_failed_tbl.rows) != 0:
                self.fail("Delete failed for few keys")
        else:
            self.log.warning("Unsupported doc_operation")

        self.log.info("Wait for ep_all_items_remaining to become '0'")
        self.bucket_util._wait_for_stats_all_buckets()

        # Validate vbucket stats
        verification_dict["ops_create"] = ref_val["ops_create"]
        verification_dict["ops_update"] = ref_val["ops_update"]
        verification_dict["ops_delete"] = ref_val["ops_delete"]

        verification_dict["rollback_item_count"] = \
            ref_val["rollback_item_count"]
        if self.durability_level:
            verification_dict["sync_write_aborted_count"] = \
                ref_val["sync_write_aborted_count"]
            verification_dict["sync_write_committed_count"] = \
                ref_val["sync_write_committed_count"]

        failed = self.durability_helper.verify_vbucket_details_stats(
            def_bucket,
            self.cluster_util.get_kv_nodes(),
            vbuckets=self.vbuckets,
            expected_val=verification_dict,
            one_less_node=one_less_node)
        if failed:
            self.fail("Cbstat vbucket-details verification failed")

        self.log.info("Validating doc_count")
        self.bucket_util.verify_stats_all_buckets(expected_num_items)

    def test_large_doc_size(self):
        # bucket size=256MB, when Bucket gets filled 236MB then test starts failing
        # document size=2MB, No of docs = 221 , load 250 docs
        # generate docs with size >= 1MB , See MB-29333

        self.doc_size *= 1024000
        gens_load = self.generate_docs_bigdata(docs_per_day=self.num_items,
                                               document_size=self.doc_size)
        for bucket in self.bucket_util.buckets:
            task = self.task.async_load_gen_docs(
                self.cluster,
                bucket,
                gens_load,
                "create",
                0,
                batch_size=10,
                process_concurrency=8,
                replicate_to=self.replicate_to,
                persist_to=self.persist_to,
                durability=self.durability_level,
                timeout_secs=self.sdk_timeout)
            self.task.jython_task_manager.get_task_result(task)

        # check if all the documents(250) are loaded with default timeout
        self.bucket_util.verify_stats_all_buckets(self.num_items)

    def test_large_doc_20MB(self):
        # test reproducer for MB-29258,
        # Load a doc which is greater than 20MB
        # with compression enabled and check if it fails
        # check with compression_mode as active, passive and off
        val_error = DurableExceptions.ValueTooLargeException
        gens_load = self.generate_docs_bigdata(docs_per_day=1,
                                               document_size=(self.doc_size *
                                                              1024000))
        for bucket in self.bucket_util.buckets:
            task = self.task.async_load_gen_docs(
                self.cluster,
                bucket,
                gens_load,
                "create",
                0,
                batch_size=10,
                process_concurrency=8,
                replicate_to=self.replicate_to,
                persist_to=self.persist_to,
                durability=self.durability_level,
                timeout_secs=self.sdk_timeout)
            self.task.jython_task_manager.get_task_result(task)
            if self.doc_size > 20:
                if len(task.fail.keys()) == 0:
                    self.log_failure("No failures during large doc insert")
                for doc_id, doc_result in task.fail.items():
                    if val_error not in str(doc_result["error"]):
                        self.log_failure("Invalid exception for key %s: %s" %
                                         (doc_id, doc_result))
            else:
                if len(task.success.keys()) == 0:
                    self.log_failure("Failures during large doc insert")

        for bucket in self.bucket_util.buckets:
            if self.doc_size > 20:
                # failed with error "Data Too Big" when document size > 20MB
                self.bucket_util.verify_stats_all_buckets(0)
            else:
                self.bucket_util.verify_stats_all_buckets(1)
                gens_update = self.generate_docs_bigdata(
                    docs_per_day=1, document_size=(21 * 1024000))
                task = self.task.async_load_gen_docs(
                    self.cluster,
                    bucket,
                    gens_update,
                    "create",
                    0,
                    batch_size=10,
                    process_concurrency=8,
                    replicate_to=self.replicate_to,
                    persist_to=self.persist_to,
                    durability=self.durability_level,
                    timeout_secs=self.sdk_timeout)
                self.task.jython_task_manager.get_task_result(task)
                if len(task.success.keys()) != 0:
                    self.log_failure("Large docs inserted for keys: %s" %
                                     task.success.keys())
                if len(task.fail.keys()) == 0:
                    self.log_failure("No failures during large doc insert")
                for doc_id, doc_result in task.fail.items():
                    if val_error not in str(doc_result["error"]):
                        self.log_failure("Invalid exception for key %s: %s" %
                                         (doc_id, doc_result))
                self.bucket_util.verify_stats_all_buckets(1)
        self.validate_test_failure()

    def test_diag_eval_curl(self):
        # Check if diag/eval can be done only by local host
        self.disable_diag_eval_on_non_local_host = \
            self.input.param("disable_diag_eval_non_local", False)
        port = self.cluster.master.port

        # check if local host can work fine
        cmd = []
        cmd_base = 'curl http://{0}:{1}@localhost:{2}/diag/eval ' \
                   .format(self.cluster.master.rest_username,
                           self.cluster.master.rest_password, port)
        command = cmd_base + '-X POST -d \'os:cmd("env")\''
        cmd.append(command)
        command = cmd_base + '-X POST -d \'case file:read_file("/etc/passwd") of {ok, B} -> io:format("~p~n", [binary_to_term(B)]) end.\''
        cmd.append(command)

        shell = RemoteMachineShellConnection(self.cluster.master)
        for command in cmd:
            output, error = shell.execute_command(command)
            self.assertNotEquals("API is accessible from localhost only",
                                 output[0])

        # Disable allow_nonlocal_eval
        if not self.disable_diag_eval_on_non_local_host:
            command = cmd_base + '-X POST -d \'ns_config:set(allow_nonlocal_eval, true).\''
            _, _ = shell.execute_command(command)

        # Check ip address on diag/eval will not work fine when allow_nonlocal_eval is disabled
        cmd = []
        cmd_base = 'curl http://{0}:{1}@{2}:{3}/diag/eval ' \
            .format(self.cluster.master.rest_username,
                    self.cluster.master.rest_password,
                    self.cluster.master.ip, port)
        command = cmd_base + '-X POST -d \'os:cmd("env")\''
        cmd.append(command)
        command = cmd_base + '-X POST -d \'case file:read_file("/etc/passwd") of {ok, B} -> io:format("~p~n", [binary_to_term(B)]) end.\''
        cmd.append(command)

        for command in cmd:
            output, error = shell.execute_command(command)
            if self.disable_diag_eval_on_non_local_host:
                self.assertEquals("API is accessible from localhost only",
                                  output[0])
            else:
                self.assertNotEquals("API is accessible from localhost only",
                                     output[0])

    def verify_stat(self, items, value="active"):
        mc = MemcachedClient(self.cluster.master.ip, 11210)
        mc.sasl_auth_plain(self.cluster.master.rest_username,
                           self.cluster.master.rest_password)
        mc.bucket_select('default')
        stats = mc.stats()
        self.assertEquals(stats['ep_compression_mode'], value)
        self.assertEquals(int(stats['ep_item_compressor_num_compressed']),
                          items)
        self.assertNotEquals(int(stats['vb_active_itm_memory']),
                             int(stats['vb_active_itm_memory_uncompressed']))

    def test_compression_active_and_off(self):
        """
        test reproducer for MB-29272,
        Load some documents with compression mode set to active
        get the cbstats
        change compression mode to off and wait for minimum 250ms
        Load some more documents and check the compression is not done
        epengine.basic_ops.basic_ops.test_compression_active_and_off,items=10000,compression_mode=active

        :return:
        """
        # Load some documents with compression mode as active
        gen_create = doc_generator("eviction1_",
                                   start=0,
                                   end=self.num_items,
                                   doc_size=self.doc_size)
        gen_create2 = doc_generator("eviction2_",
                                    start=0,
                                    end=self.num_items,
                                    doc_size=self.doc_size)
        def_bucket = self.bucket_util.get_all_buckets()[0]
        task = self.task.async_load_gen_docs(self.cluster,
                                             def_bucket,
                                             gen_create,
                                             "create",
                                             0,
                                             batch_size=10,
                                             process_concurrency=8,
                                             replicate_to=self.replicate_to,
                                             persist_to=self.persist_to,
                                             durability=self.durability_level,
                                             timeout_secs=self.sdk_timeout)
        self.task.jython_task_manager.get_task_result(task)
        self.bucket_util._wait_for_stats_all_buckets()
        self.bucket_util.verify_stats_all_buckets(self.num_items)

        remote = RemoteMachineShellConnection(self.cluster.master)
        for bucket in self.bucket_util.buckets:
            # change compression mode to off
            output, _ = remote.execute_couchbase_cli(
                cli_command='bucket-edit',
                cluster_host="localhost:8091",
                user=self.cluster.master.rest_username,
                password=self.cluster.master.rest_password,
                options='--bucket=%s --compression-mode off' % bucket.name)
            self.assertTrue(' '.join(output).find('SUCCESS') != -1,
                            'compression mode set to off')

            # sleep for 10 sec (minimum 250sec)
            time.sleep(10)

        # Load data and check stats to see compression
        # is not done for newly added data
        task = self.task.async_load_gen_docs(self.cluster,
                                             def_bucket,
                                             gen_create2,
                                             "create",
                                             0,
                                             batch_size=10,
                                             process_concurrency=8,
                                             replicate_to=self.replicate_to,
                                             persist_to=self.persist_to,
                                             durability=self.durability_level,
                                             timeout_secs=self.sdk_timeout)
        self.task.jython_task_manager.get_task_result(task)
        self.bucket_util._wait_for_stats_all_buckets()
        self.bucket_util.verify_stats_all_buckets(self.num_items * 2)

    def do_get_random_key(self):
        # MB-31548, get_Random key gets hung sometimes.
        mc = MemcachedClient(self.cluster.master.ip, 11210)
        mc.sasl_auth_plain(self.cluster.master.rest_username,
                           self.cluster.master.rest_password)
        mc.bucket_select('default')

        count = 0
        while count < 1000000:
            count += 1
            try:
                mc.get_random_key()
            except MemcachedError as error:
                self.fail("<MemcachedError #%d ``%s''>" %
                          (error.status, error.message))
            if count % 1000 == 0:
                self.log.info('The number of iteration is {}'.format(count))