예제 #1
0
 def setUp(self):
     super(OpsChangeCasTests, self).setUp()
     self.key = "test_cas"
     self.expire_time = self.input.param("expire_time", 35)
     self.item_flag = self.input.param("item_flag", 0)
     self.load_gen = doc_generator(self.key, 0, self.num_items,
                                   doc_size=self.doc_size)
     self.node_data = dict()
     for node in self.cluster_util.get_kv_nodes():
         shell = RemoteMachineShellConnection(node)
         cb_stat = Cbstats(shell)
         self.node_data[node.ip] = dict()
         self.node_data[node.ip]["shell"] = shell
         self.node_data[node.ip]["cb_stat"] = Cbstats(shell)
         self.node_data[node.ip]["active"] = cb_stat.vbucket_list(
             self.bucket,
             "active")
         self.node_data[node.ip]["replica"] = cb_stat.vbucket_list(
             self.bucket,
             "replica")
     if self.sdk_client_pool:
         self.client = self.sdk_client_pool.get_client_for_bucket(
             self.bucket)
     else:
         self.client = SDKClient([self.cluster.master], self.bucket)
예제 #2
0
    def collect_vbucket_num_stats(self, servers, buckets):
        """
            Method to extract the failovers stats given by cbstats tool

            Paramters:
              buckets: bucket informaiton
              servers: server information

            Returns:
              Failover stats as follows:
              if not collecting per node :: {bucket : [{key:value}]}
              if collecting per node :: {bucket : {node:[{key:value}]}}
        """
        active_bucketMap = {}
        replica_bucketMap = {}
        for bucket in buckets:
            active_map_data = {}
            replica_map_data = {}
            for server in servers:
                #client = MemcachedClientHelper.direct_client(server, bucket)
                #stats = client.stats('')
                cbstat = Cbstats(server)
                stats = cbstat.vbucket_list(bucket.name)
                active_map_data[server.ip] = len(stats)
                stats = cbstat.vbucket_list(bucket.name,
                                            vbucket_type="replica")
                replica_map_data[server.ip] = len(stats)
                # for key in stats.keys():
                #     if key == 'vb_active_num':
                #         active_map_data[server.ip] = int(stats[key])
                #     if key == 'vb_replica_num':
                #         replica_map_data[server.ip] = int(stats[key])
            active_bucketMap[bucket.name] = active_map_data
            replica_bucketMap[bucket.name] = replica_map_data
        return active_bucketMap, replica_bucketMap
예제 #3
0
    def online_swap(self, node_to_upgrade, version,
                    install_on_spare_node=True):
        vb_details = dict()
        vb_verification = dict()
        vb_types = ["active", "replica"]

        # Fetch active services on node_to_upgrade
        rest = self.__get_rest_node(node_to_upgrade)
        services = rest.get_nodes_services()
        services_on_target_node = services[(node_to_upgrade.ip + ":"
                                            + node_to_upgrade.port)]

        # Record vbuckets in swap_node
        if CbServer.Services.KV in services_on_target_node:
            cbstats = Cbstats(node_to_upgrade)
            for vb_type in vb_types:
                vb_details[vb_type] = \
                    cbstats.vbucket_list(self.bucket.name, vb_type)

        if install_on_spare_node:
            # Install target version on spare node
            self.install_version_on_node([self.spare_node], version)

        # Perform swap rebalance for node_to_upgrade <-> spare_node
        rebalance_passed = self.task.rebalance(
            self.cluster_util.get_nodes(self.cluster.master),
            to_add=[self.spare_node],
            to_remove=[node_to_upgrade],
            check_vbucket_shuffling=False,
            services=[",".join(services_on_target_node)])
        if not rebalance_passed:
            self.log_failure("Swap rebalance failed during upgrade of {0}"
                             .format(node_to_upgrade))

        # VBuckets shuffling verification
        if CbServer.Services.KV in services_on_target_node:
            # Fetch vbucket stats after swap rebalance for verification
            cbstats = Cbstats(self.spare_node)
            for vb_type in vb_types:
                vb_verification[vb_type] = \
                    cbstats.vbucket_list(self.bucket.name, vb_type)

            # Check vbuckets are shuffled or not
            for vb_type in vb_types:
                if vb_details[vb_type].sort() \
                        != vb_verification[vb_type].sort():
                    self.log_failure("%s vbuckets shuffled post swap_rebalance"
                                     % vb_type)
                    self.log.error("%s vbuckets before vs after: %s != %s"
                                   % (vb_type,
                                      vb_details[vb_type],
                                      vb_verification[vb_type]))

        # Update master node
        self.cluster.master = self.spare_node
        self.cluster.nodes_in_cluster.append(self.spare_node)

        # Update spare_node to rebalanced-out node
        self.spare_node = node_to_upgrade
        self.cluster.nodes_in_cluster.remove(node_to_upgrade)
예제 #4
0
 def get_vbucket_type_mapping(self, bucket_name):
     for node in self.vbs_in_node.keys():
         cb_stat = Cbstats(self.vbs_in_node[node]["shell"])
         self.vbs_in_node[node]["active"] = \
             cb_stat.vbucket_list(bucket_name, "active")
         self.vbs_in_node[node]["replica"] = \
             cb_stat.vbucket_list(bucket_name, "replica")
예제 #5
0
    def setUp(self):
        super(OutOfOrderReturns, self).setUp()

        self.ooo_order = 0
        self.test_lock = Lock()
        self.doc_ops = self.input.param("doc_ops", "update;update").split(";")

        # Initialize cluster using given nodes
        nodes_init = self.cluster.servers[1:self.nodes_init] \
            if self.nodes_init != 1 else []
        self.task.rebalance([self.cluster.master], nodes_init, [])
        self.cluster.nodes_in_cluster.extend([self.cluster.master] +
                                             nodes_init)

        # Disable auto-failover to avoid failover of nodes
        status = RestConnection(self.cluster.master) \
            .update_autofailover_settings(False, 120, False)
        self.assertTrue(status, msg="Failure during disabling auto-failover")

        # Create default bucket and add rbac user
        self.bucket_util.create_default_bucket(
            bucket_type=self.bucket_type,
            storage=self.bucket_storage,
            ram_quota=self.bucket_size,
            replica=self.num_replicas,
            compression_mode=self.compression_mode,
            eviction_policy=self.bucket_eviction_policy)

        self.cluster.nodes_in_cluster.extend([self.cluster.master])
        self.bucket = self.bucket_util.buckets[0]
        # Create sdk_clients for pool
        if self.sdk_client_pool:
            self.log.info("Creating SDK client pool")
            self.sdk_client_pool.create_clients(
                self.bucket,
                self.cluster.nodes_in_cluster,
                req_clients=self.sdk_pool_capacity,
                compression_settings=self.sdk_compression)

        # Create shell connection to each kv_node for cbstat object
        self.kv_nodes = self.cluster_util.get_kv_nodes()
        self.node_data = dict()
        for node in self.kv_nodes:
            shell = RemoteMachineShellConnection(node)
            cb_stat = Cbstats(shell)
            self.node_data[node] = dict()
            self.node_data[node]["shell"] = shell
            self.node_data[node]["cb_stat"] = cb_stat
            self.node_data[node]["active_vbs"] = \
                cb_stat.vbucket_list(self.bucket.name, vbucket_type="active")
            self.node_data[node]["replica_vbs"] = \
                cb_stat.vbucket_list(self.bucket.name, vbucket_type="replica")

        # Print cluster & bucket stats
        self.cluster_util.print_cluster_stats()
        self.bucket_util.print_bucket_stats()
예제 #6
0
    def rebalance_out_with_warming_up(self):
        master_restart = self.input.param("master_restart", False)
        if master_restart:
            warmup_node = self.cluster.master
        else:
            warmup_node = self.cluster.servers[len(self.cluster.servers) - self.nodes_out - 1]
        servs_out = self.cluster.servers[len(self.cluster.servers) - self.nodes_out:]

        if self.test_abort_snapshot:
            self.log.info("Creating sync_write abort scenario for replica vbs")
            for server in self.cluster_util.get_kv_nodes(self.cluster):
                ssh_shell = RemoteMachineShellConnection(server)
                cbstats = Cbstats(ssh_shell)
                replica_vbs = cbstats.vbucket_list(
                    self.cluster.buckets[0].name, "replica")
                load_gen = doc_generator(self.key, 0, 5000,
                                         target_vbucket=replica_vbs)
                success = self.bucket_util.load_durable_aborts(
                    ssh_shell, [load_gen],
                    self.cluster.buckets[0],
                    self.durability_level,
                    "update", "all_aborts")
                if not success:
                    self.log_failure("Simulating aborts failed")
                ssh_shell.disconnect()

            self.validate_test_failure()

        shell = RemoteMachineShellConnection(warmup_node)
        shell.stop_couchbase()
        self.sleep(20)
        shell.start_couchbase()
        shell.disconnect()

        # Workaround for Eph case (MB-44682 - Not a bug)
        if self.bucket_type == Bucket.Type.EPHEMERAL:
            self.sleep(15, "Wait for couchbase server to start")

        rebalance = self.task.async_rebalance(
            self.cluster.servers, [], servs_out)
        self.task.jython_task_manager.get_task_result(rebalance)
        self.assertTrue(rebalance.result, "Rebalance Failed")
        self.cluster.nodes_in_cluster = list(set(self.cluster.nodes_in_cluster) - set(servs_out))
        if rebalance.result is False:
            self.log.info("Rebalance was failed as expected")
            self.assertTrue(self.bucket_util._wait_warmup_completed(
                self.cluster_util.get_kv_nodes(self.cluster),
                self.cluster.buckets[0],
                wait_time=self.wait_timeout * 10))

            self.log.info("Second attempt to rebalance")
            rebalance = self.task.async_rebalance(
                self.cluster.servers, [], servs_out)
            self.task.jython_task_manager.get_task_result(rebalance)
            self.assertTrue(rebalance.result, "Rebalance attempt failed again")
            self.cluster.nodes_in_cluster = list(set(self.cluster.nodes_in_cluster) - set(servs_out))
        if not self.atomicity:
            self.bucket_util.verify_cluster_stats(self.cluster, self.num_items,
                                                  timeout=self.wait_timeout)
            self.bucket_util.verify_unacked_bytes_all_buckets(self.cluster)
예제 #7
0
    def load_docs_in_cb_bucket_before_and_after_cbas_connect(self):
        self.setup_for_test()

        # Load more docs in Couchbase bucket.
        self.perform_doc_ops_in_all_cb_buckets("create", self.num_items,
                                               self.num_items * 2)
        self.bucket_util.verify_stats_all_buckets(self.num_items * 2)

        if self.test_abort_snapshot:
            self.log.info("Creating sync_write aborts after dataset connect")
            for server in self.cluster_util.get_kv_nodes():
                ssh_shell = RemoteMachineShellConnection(server)
                cbstats = Cbstats(ssh_shell)
                replica_vbs = cbstats.vbucket_list(
                    self.bucket_util.buckets[0].name, "replica")
                load_gen = doc_generator("test_abort_key",
                                         self.num_items,
                                         self.num_items,
                                         target_vbucket=replica_vbs)
                success = self.bucket_util.load_durable_aborts(
                    ssh_shell, [load_gen], self.bucket_util.buckets[0],
                    self.durability_level, "update", "all_aborts")
                if not success:
                    self.log_failure("Simulating aborts failed")
                ssh_shell.disconnect()

            self.validate_test_failure()

        # Validate no. of items in CBAS dataset
        if not self.cbas_util.validate_cbas_dataset_items_count(
                self.cbas_dataset_name, self.num_items * 2):
            self.fail("No. of items in CBAS dataset do not match "
                      "that in the CB bucket")
예제 #8
0
    def get_vbucket_info_from_failover_nodes(self):
        """
        Fetch active/replica vbucket list from the
        nodes which are going to be failed over
        """
        bucket = self.bucket_util.buckets[0]
        # Reset the values
        self.active_vb_in_failover_nodes = list()
        self.replica_vb_in_failover_nodes = list()

        # Fetch new vbucket list
        for node in self.server_to_fail:
            shell_conn = RemoteMachineShellConnection(node)
            cbstat = Cbstats(shell_conn)
            self.active_vb_in_failover_nodes += cbstat.vbucket_list(
                bucket.name, "active")
            self.replica_vb_in_failover_nodes += cbstat.vbucket_list(
                bucket.name, "replica")
예제 #9
0
    def setUp(self):
        super(OutOfOrderReturns, self).setUp()

        # Create default bucket
        self.create_bucket(self.cluster)

        self.ooo_order = 0
        self.test_lock = Lock()
        self.doc_ops = self.input.param("doc_ops", "update;update").split(";")

        # Disable auto-failover to avoid failover of nodes
        status = RestConnection(self.cluster.master) \
            .update_autofailover_settings(False, 120, False)
        self.assertTrue(status, msg="Failure during disabling auto-failover")

        self.cluster.nodes_in_cluster.extend([self.cluster.master])
        self.bucket = self.cluster.buckets[0]

        # Create sdk_clients for pool
        if self.sdk_client_pool:
            self.log.info("Creating SDK client pool")
            self.sdk_client_pool.create_clients(
                self.bucket,
                self.cluster.nodes_in_cluster,
                req_clients=self.sdk_pool_capacity,
                compression_settings=self.sdk_compression)

        # Create shell connection to each kv_node for cbstat object
        self.kv_nodes = self.cluster_util.get_kv_nodes(self.cluster)
        self.node_data = dict()
        for node in self.kv_nodes:
            shell = RemoteMachineShellConnection(node)
            cb_stat = Cbstats(shell)
            self.node_data[node] = dict()
            self.node_data[node]["shell"] = shell
            self.node_data[node]["cb_stat"] = cb_stat
            self.node_data[node]["active_vbs"] = \
                cb_stat.vbucket_list(self.bucket.name, vbucket_type="active")
            self.node_data[node]["replica_vbs"] = \
                cb_stat.vbucket_list(self.bucket.name, vbucket_type="replica")

        # Print cluster & bucket stats
        self.cluster_util.print_cluster_stats(self.cluster)
        self.bucket_util.print_bucket_stats(self.cluster)
예제 #10
0
    def test_failover_expired_items_in_vB(self):
        self.maxttl = 120
        self.doc_ops = "expiry"
        self.expiry_perc = self.input.param("expiry_perc", 100)

        shell_conn = RemoteMachineShellConnection(
            self.cluster.nodes_in_cluster[-1])
        cbstats = Cbstats(shell_conn)
        self.target_vbucket = cbstats.vbucket_list(
            self.bucket_util.buckets[0].name)

        self.generate_docs(target_vbucket=self.target_vbucket)

        _ = self.loadgen_docs(self.retry_exceptions,
                              self.ignore_exceptions,
                              _sync=True)
        self.bucket_util._wait_for_stats_all_buckets()

        # exp_pager_stime
        self.bucket_util._expiry_pager(self.exp_pager_stime)
        self.sleep(
            self.exp_pager_stime, "Wait until exp_pager_stime for kv_purger\
         to kickoff")
        self.sleep(
            self.exp_pager_stime * 10,
            "Wait for KV purger to scan expired docs and add \
        tombstones.")

        self.task.async_failover(self.cluster.nodes_in_cluster,
                                 self.cluster.nodes_in_cluster[-1],
                                 graceful=True)

        self.nodes = self.rest.node_statuses()
        self.task.rebalance(self.cluster.nodes_in_cluster,
                            to_add=[],
                            to_remove=[self.cluster.nodes_in_cluster[-1]])

        # Metadata Purge Interval
        self.meta_purge_interval = 60
        self.bucket_util.cbepctl_set_metadata_purge_interval(
            value=self.meta_purge_interval, buckets=self.buckets)
        self.sleep(
            self.meta_purge_interval * 2,
            "Wait for Metadata Purge Interval to drop \
        tomb-stones from storage")

        self.log.info("Starting compaction for each bucket")
        self.run_compaction()

        # All docs and tomb-stone should be dropped from the storage
        ts = self.get_tombstone_count_key(self.cluster.nodes_in_cluster)
        self.log.info("Tombstones after full compaction: {}".format(ts))
예제 #11
0
    def rebalance_out_after_ops(self):
        self.gen_delete = self.get_doc_generator(self.items / 2,
                                                 self.items)
        self.gen_create = self.get_doc_generator(self.num_items,
                                                 self.num_items + self.items / 2)
        # define which doc's ops will be performed during rebalancing
        # allows multiple of them but one by one
        self.check_temporary_failure_exception = False
        self.loadgen_docs(task_verification=True)

        if self.test_abort_snapshot:
            self.log.info("Creating sync_write abort scenario for replica vbs")
            for server in self.cluster_util.get_kv_nodes(self.cluster):
                ssh_shell = RemoteMachineShellConnection(server)
                cbstats = Cbstats(ssh_shell)
                replica_vbs = cbstats.vbucket_list(
                    self.cluster.buckets[0].name, "replica")
                load_gen = doc_generator(self.key, 0, 5000,
                                         target_vbucket=replica_vbs)
                success = self.bucket_util.load_durable_aborts(
                    ssh_shell, [load_gen],
                    self.cluster.buckets[0],
                    self.durability_level,
                    "update", "all_aborts")
                if not success:
                    self.log_failure("Simulating aborts failed")
                ssh_shell.disconnect()

            self.validate_test_failure()

        servs_out = [self.cluster.servers[self.nodes_init - i - 1] for i in range(self.nodes_out)]
        if not self.atomicity:
            self.bucket_util._wait_for_stats_all_buckets(self.cluster,
                                                         self.cluster.buckets)
            self.bucket_util.validate_docs_per_collections_all_buckets(
                self.cluster,
                timeout=self.wait_timeout)
        prev_failover_stats = self.bucket_util.get_failovers_logs(self.cluster.servers[:self.nodes_init], self.cluster.buckets)
        prev_vbucket_stats = self.bucket_util.get_vbucket_seqnos(self.cluster.servers[:self.nodes_init], self.cluster.buckets)
#         record_data_set = self.bucket_util.get_data_set_all(self.cluster.servers[:self.nodes_init], self.cluster.buckets)
        self.bucket_util.compare_vbucketseq_failoverlogs(prev_vbucket_stats, prev_failover_stats)
        self.add_remove_servers_and_rebalance([], servs_out)
        if not self.atomicity:
            self.bucket_util.validate_docs_per_collections_all_buckets(
                self.cluster)
            self.bucket_util.verify_cluster_stats(
                self.cluster, self.num_items,
                check_ep_items_remaining=True,
                timeout=self.wait_timeout)
        new_failover_stats = self.bucket_util.compare_failovers_logs(
            self.cluster, prev_failover_stats,
            self.cluster.servers[:self.nodes_init - self.nodes_out],
            self.cluster.buckets)
        new_vbucket_stats = self.bucket_util.compare_vbucket_seqnos(
            self.cluster, prev_vbucket_stats,
            self.cluster.servers[:self.nodes_init - self.nodes_out],
            self.cluster.buckets,
            perNode=False)
        self.sleep(60)
#         self.bucket_util.data_analysis_all(record_data_set, self.cluster.servers[:self.nodes_init - self.nodes_out], self.cluster.buckets)
        self.bucket_util.compare_vbucketseq_failoverlogs(new_vbucket_stats, new_failover_stats)
        self.bucket_util.verify_unacked_bytes_all_buckets(self.cluster)
        nodes = self.cluster_util.get_nodes_in_cluster(self.cluster)
        self.bucket_util.vb_distribution_analysis(
            self.cluster,
            servers=nodes, buckets=self.cluster.buckets, std=1.0,
            total_vbuckets=self.cluster.vbuckets, num_replicas=self.num_replicas)
예제 #12
0
    def test_fts_index_with_aborts(self):
        """
        1. Create index (2i/view) on default bucket
        2. Load multiple docs such that all sync_writes will be aborted
        3. Verify nothing went into indexing
        4. Load sync_write docs such that they are successful
        5. Validate the mutated docs are taken into indexing
        :return:
        """
        self.key = "test_query_doc"
        self.index_name = "fts_test_index"
        self.sync_write_abort_pattern = self.input.param(
            "sync_write_abort_pattern", "all_aborts")
        self.create_index_during = self.input.param("create_index_during",
                                                    "before_doc_ops")
        self.restServer = self.cluster_util.get_nodes_from_services_map(
            cluster=self.cluster, service_type=CbServer.Services.FTS)
        self.rest = RestConnection(self.restServer)
        crud_batch_size = 1000
        def_bucket = self.cluster.buckets[0]
        kv_nodes = self.cluster_util.get_kv_nodes(self.cluster)
        replica_vbs = dict()
        verification_dict = dict()
        index_item_count = dict()
        expected_num_indexed = dict()
        load_gen = dict()
        load_gen["ADD"] = dict()
        load_gen["SET"] = dict()
        partial_aborts = ["initial_aborts", "aborts_at_end"]

        durability_helper = DurabilityHelper(
            self.log,
            len(self.cluster.nodes_in_cluster),
            durability=self.durability_level,
            replicate_to=self.replicate_to,
            persist_to=self.persist_to)

        if self.create_index_during == "before_doc_ops":
            self.create_fts_indexes(def_bucket.name, self.index_name)

        curr_items = self.bucket_util.get_bucket_current_item_count(
            self.cluster, def_bucket)
        if self.sync_write_abort_pattern in ["all_aborts", "initial_aborts"]:
            self.bucket_util.flush_bucket(self.cluster, def_bucket)
            self.num_items = 0
        else:
            self.num_items = curr_items

        self.log.info("Disabling auto_failover to avoid node failures")
        status = RestConnection(self.cluster.master) \
            .update_autofailover_settings(False, 120, False)
        self.assertTrue(status, msg="Failure during disabling auto-failover")

        # Validate vbucket stats
        verification_dict["ops_create"] = self.num_items
        verification_dict["ops_update"] = 0
        # verification_dict["ops_delete"] = 0
        verification_dict["rollback_item_count"] = 0
        verification_dict["sync_write_aborted_count"] = 0
        verification_dict["sync_write_committed_count"] = 0

        if self.create_index_during == "before_doc_ops":
            self.validate_indexed_doc_count(self.index_name,
                                            verification_dict["ops_create"])

        self.log.info("Loading docs such that all sync_writes will be aborted")
        for server in kv_nodes:
            ssh_shell = RemoteMachineShellConnection(server)
            cbstats = Cbstats(ssh_shell)
            replica_vbs[server] = cbstats.vbucket_list(def_bucket.name,
                                                       "replica")
            load_gen["ADD"][server] = list()
            load_gen["ADD"][server].append(
                doc_generator(self.key,
                              0,
                              crud_batch_size,
                              target_vbucket=replica_vbs[server],
                              mutation_type="ADD"))
            if self.sync_write_abort_pattern in partial_aborts:
                load_gen["ADD"][server].append(
                    doc_generator(self.key,
                                  10000,
                                  crud_batch_size,
                                  target_vbucket=replica_vbs[server],
                                  mutation_type="ADD"))
                verification_dict["ops_create"] += crud_batch_size
                verification_dict["sync_write_committed_count"] += \
                    crud_batch_size

            task_success = self.bucket_util.load_durable_aborts(
                ssh_shell, load_gen["ADD"][server], def_bucket,
                self.durability_level, "create", self.sync_write_abort_pattern)
            if not task_success:
                self.log_failure("Failure during load_abort task")

            verification_dict["sync_write_aborted_count"] += \
                crud_batch_size
            if self.create_index_during == "before_doc_ops":
                self.validate_indexed_doc_count(
                    self.index_name, verification_dict["ops_create"])

            load_gen["SET"][server] = list()
            load_gen["SET"][server].append(
                doc_generator(self.key,
                              0,
                              crud_batch_size,
                              target_vbucket=replica_vbs[server],
                              mutation_type="SET"))
            if self.sync_write_abort_pattern in partial_aborts:
                load_gen["SET"][server].append(
                    doc_generator(self.key,
                                  10000,
                                  crud_batch_size,
                                  target_vbucket=replica_vbs[server],
                                  mutation_type="SET"))
                verification_dict["ops_update"] += crud_batch_size
                verification_dict["sync_write_committed_count"] += \
                    crud_batch_size

            verification_dict["sync_write_aborted_count"] += \
                crud_batch_size
            task_success = self.bucket_util.load_durable_aborts(
                ssh_shell, load_gen["SET"][server], def_bucket,
                self.durability_level, "update", self.sync_write_abort_pattern)
            if not task_success:
                self.log_failure("Failure during load_abort task")

            ssh_shell.disconnect()

            if self.create_index_during == "before_doc_ops":
                self.validate_indexed_doc_count(
                    self.index_name, verification_dict["ops_create"])
        failed = durability_helper.verify_vbucket_details_stats(
            def_bucket,
            kv_nodes,
            vbuckets=self.cluster.vbuckets,
            expected_val=verification_dict)
        # if failed:
        #     self.sleep(6000)
        #     self.log_failure("Cbstat vbucket-details verification failed")
        self.validate_test_failure()

        if self.create_index_during == "after_doc_ops":
            self.create_fts_indexes(def_bucket.name, self.index_name)
            self.validate_indexed_doc_count(self.index_name,
                                            verification_dict["ops_create"])

        self.log.info("Verify aborts are not indexed")
        self.validate_indexed_doc_count(self.index_name,
                                        verification_dict["ops_create"])

        for server in kv_nodes:
            if self.sync_write_abort_pattern == "initial_aborts":
                load_gen["ADD"][server] = load_gen["ADD"][server][:1]
                load_gen["SET"][server] = load_gen["SET"][server][:1]
            elif self.sync_write_abort_pattern == "aborts_at_end":
                load_gen["ADD"][server] = load_gen["ADD"][server][-1:]
                load_gen["SET"][server] = load_gen["SET"][server][-1:]

        self.log.info("Load sync_write docs such that they are successful")
        for server in kv_nodes:
            for gen_load in load_gen["ADD"][server]:
                task = self.task.async_load_gen_docs(
                    self.cluster,
                    def_bucket,
                    gen_load,
                    "create",
                    0,
                    batch_size=50,
                    process_concurrency=8,
                    replicate_to=self.replicate_to,
                    persist_to=self.persist_to,
                    durability=self.durability_level,
                    timeout_secs=self.sdk_timeout)
                self.task.jython_task_manager.get_task_result(task)
                if len(task.fail.keys()) != 0:
                    self.log_failure("Some failures seen during doc_ops")
                verification_dict["ops_create"] += crud_batch_size
                self.validate_indexed_doc_count(
                    self.index_name, verification_dict["ops_create"])

            for gen_load in load_gen["SET"][server]:
                task = self.task.async_load_gen_docs(
                    self.cluster,
                    def_bucket,
                    gen_load,
                    "update",
                    0,
                    batch_size=50,
                    process_concurrency=8,
                    replicate_to=self.replicate_to,
                    persist_to=self.persist_to,
                    durability=self.durability_level,
                    timeout_secs=self.sdk_timeout)
                self.task.jython_task_manager.get_task_result(task)
                if len(task.fail.keys()) != 0:
                    self.log_failure("Some failures seen during doc_ops")
                verification_dict["ops_update"] += crud_batch_size
                self.validate_indexed_doc_count(
                    self.index_name, verification_dict["ops_create"])

        self.log.info("Validate the mutated docs are taken into indexing")
        self.validate_indexed_doc_count(self.index_name,
                                        verification_dict["ops_create"])
        self.validate_test_failure()
예제 #13
0
    def test_durability_abort(self):
        """
        Test to validate durability abort is triggered properly with proper
        rollback on active vbucket
        :return:
        """
        load_task = dict()

        # Override d_level, error_simulation type based on d_level
        self.__get_d_level_and_error_to_simulate()

        kv_nodes = self.cluster_util.get_kv_nodes(self.cluster)
        for server in kv_nodes:
            ssh_shell = RemoteMachineShellConnection(server)
            cbstats = Cbstats(server)
            cb_err = CouchbaseError(self.log, ssh_shell)
            target_vb_type = "replica"
            if self.durability_level \
                    == Bucket.DurabilityLevel.MAJORITY_AND_PERSIST_TO_ACTIVE:
                target_vb_type = "active"
            target_vbs = cbstats.vbucket_list(self.bucket.name, target_vb_type)
            doc_load_spec = dict()
            doc_load_spec["doc_crud"] = dict()
            doc_load_spec["doc_crud"][
                MetaCrudParams.DocCrud.CREATE_PERCENTAGE_PER_COLLECTION] = 2
            doc_load_spec["doc_crud"][
                MetaCrudParams.DocCrud.UPDATE_PERCENTAGE_PER_COLLECTION] = 2
            doc_load_spec["doc_crud"][
                MetaCrudParams.DocCrud.DELETE_PERCENTAGE_PER_COLLECTION] = 2

            doc_load_spec["doc_crud"][MetaCrudParams.DocCrud.COMMON_DOC_KEY] \
                = "test_collections"
            doc_load_spec[MetaCrudParams.TARGET_VBUCKETS] = target_vbs

            doc_load_spec[MetaCrudParams.DURABILITY_LEVEL] \
                = self.durability_level
            doc_load_spec[MetaCrudParams.RETRY_EXCEPTIONS] = [
                SDKException.DurabilityAmbiguousException
            ]
            doc_load_spec[MetaCrudParams.SDK_TIMEOUT] = 2
            doc_load_spec[MetaCrudParams.SKIP_READ_ON_ERROR] = True
            doc_load_spec[MetaCrudParams.SUPPRESS_ERROR_TABLE] = True

            cb_err.create(self.simulate_error, self.cluster.buckets[0].name)
            load_task[server] = \
                self.bucket_util.run_scenario_from_spec(
                    self.task,
                    self.cluster,
                    self.cluster.buckets,
                    doc_load_spec,
                    batch_size=1,
                    validate_task=False)
            cb_err.revert(self.simulate_error, self.cluster.buckets[0].name)
            ssh_shell.disconnect()
        self.validate_test_failure()

        failed = self.durability_helper.verify_vbucket_details_stats(
            self.bucket,
            kv_nodes,
            vbuckets=self.cluster.vbuckets,
            expected_val=self.verification_dict)
        if failed:
            self.log_failure("Cbstat vbucket-details verification failed "
                             "after aborts")
        self.validate_test_failure()

        # Retry aborted keys with healthy cluster
        self.log.info("Performing CRUDs on healthy cluster")
        for server in kv_nodes:
            self.bucket_util.validate_doc_loading_results(load_task[server])
            if load_task[server].result is False:
                self.log_failure("Doc retry task failed on %s" % server.ip)

            # Update cbstat vb-details verification counters
            for bucket, s_dict in load_task[server].loader_spec.items():
                for s_name, c_dict in s_dict["scopes"].items():
                    for c_name, _ in c_dict["collections"].items():
                        c_crud_data = load_task[server].loader_spec[bucket][
                            "scopes"][s_name]["collections"][c_name]
                        for op_type in c_crud_data.keys():
                            total_mutation = \
                                c_crud_data[op_type]["doc_gen"].end \
                                - c_crud_data[op_type]["doc_gen"].start
                            if op_type in DocLoading.Bucket.DOC_OPS:
                                self.verification_dict["ops_%s" % op_type] \
                                    += total_mutation
                                self.verification_dict[
                                    "sync_write_committed_count"] \
                                    += total_mutation
            failed = self.durability_helper.verify_vbucket_details_stats(
                self.bucket,
                self.cluster_util.get_kv_nodes(self.cluster),
                vbuckets=self.cluster.vbuckets,
                expected_val=self.verification_dict)
            if failed:
                self.log_failure("Cbstat vbucket-details verification "
                                 "failed after ops on server: %s" % server.ip)
        self.validate_test_failure()
예제 #14
0
    def collect_vbucket_stats(self,
                              buckets,
                              servers,
                              collect_vbucket=True,
                              collect_vbucket_seqno=True,
                              collect_vbucket_details=True,
                              perNode=True):
        """
            Method to extract the vbuckets stats given by cbstats tool

            Paramters:
              buckets: bucket information
              servers: server information
              collect_vbucket: take vbucket type stats
              collect_vbucket_seqno: take vbucket-seqno type stats
              collect_vbucket_details: take vbucket-details type stats
              perNode: if True collects data per node else takes a union
                       across nodes

            Returns:
              The output can be in two formats

              if we are doing per node data collection
              Vbucket Information :: {bucket { node : [vbucket_seqno {key:value} U vbucket_details {key:value} U vbucket {key:value}]}}

              if we are not doing per node data collection
              Vbucket Information :: {bucket : [vbucket_seqno {key:value} U vbucket_details {key:value} U vbucket {key:value}]}
        """
        bucketMap = dict()
        for bucket in buckets:
            if bucket.bucketType == Bucket.Type.MEMCACHED:
                continue
            dataMap = dict()
            for server in servers:
                map_data = dict()
                cbstat = Cbstats(server)

                if collect_vbucket:
                    result = dict()
                    for vb_type in ["active", "replica"]:
                        vb_list = cbstat.vbucket_list(bucket.name, vb_type)
                        for vb_num in vb_list:
                            result['vb_%s' % vb_num] = dict()
                            result['vb_%s' % vb_num]["state"] = vb_type
                    map_data.update(result)
                    # vbucket = client.stats('vbucket')
                    # self.createMapVbucket(vbucket, map_data)
                if collect_vbucket_seqno:
                    result = cbstat.vbucket_seqno(bucket.name)
                    for key in result.keys():
                        result['vb_' + key] = result.pop(key)
                    map_data.update(result)
                    # vbucket_seqno = client.stats('vbucket-seqno')
                    # self.createMapVbucket(vbucket_seqno, map_data)
                if collect_vbucket_details:
                    result = cbstat.vbucket_details(bucket.name)
                    for key in result.keys():
                        result['vb_' + key] = result.pop(key)
                    map_data.update(result)
                    # vbucket_details = client.stats('vbucket-details')
                    # self.createMapVbucket(vbucket_details, map_data)
                if perNode:
                    dataMap[server.ip] = map_data
                else:
                    dataMap.update(map_data)
            bucketMap[bucket.name] = dataMap
        return bucketMap
예제 #15
0
    def test_create_remove_scope_with_node_crash(self):
        """
        1. Select a error scenario to simulate in random
        2. Create error scenario either before or after scope create/delete
        3. Initiate scope creation/deletion under the bucket
        4. Validate the outcome of scope creation/deletion
        """
        def create_scope(client_type, bucket_obj, scope):
            if client_type == "sdk":
                client.create_scope(scope)
            elif client_type == "rest":
                self.bucket_util.create_scope(self.cluster.master, bucket_obj,
                                              {"name": scope})
            else:
                self.log_failure("Invalid client_type provided")

        def remove_scope(client_type, bucket_obj, scope):
            if client_type == "sdk":
                client.drop_scope(scope)
            elif client_type == "rest":
                self.bucket_util.drop_scope(self.cluster.master,
                                            bucket_obj,
                                            scope)
            else:
                self.log_failure("Invalid client_type provided")

        kv_nodes = self.cluster_util.get_kv_nodes()
        if len(kv_nodes) == 1:
            self.fail("Need atleast two KV nodes to run this test")

        client = None
        action = self.input.param("action", "create")
        crash_during = self.input.param("crash_during", "pre_action")
        data_load_option = self.input.param("data_load_option", None)
        crash_type = self.input.param("simulate_error",
                                      CouchbaseError.KILL_MEMCACHED)

        # Always use a random scope name to create/remove
        # since CREATE/DROP not supported for default scope
        self.scope_name = BucketUtils.get_random_name()

        # Select a KV node other than master node from the cluster
        node_to_crash = kv_nodes[sample(range(1, len(kv_nodes)), 1)[0]]

        # Create a required client object
        if self.client_type == "sdk":
            client = SDKClient([self.cluster.master], self.bucket)

        if action == "remove":
            # Create a scope to be removed
            use_client = sample(["sdk", "rest"], 1)[0]
            create_scope(use_client, self.bucket, self.scope_name)

        # Create a error scenario
        shell = RemoteMachineShellConnection(node_to_crash)
        cb_error = CouchbaseError(self.log, shell)
        cbstat_obj = Cbstats(shell)
        active_vbs = cbstat_obj.vbucket_list(self.bucket.name,
                                             vbucket_type="active")
        target_vbuckets = list(
            set(range(0, 1024)).difference(set(active_vbs)))
        doc_gen = doc_generator(self.key, 0, 1000,
                                target_vbucket=target_vbuckets)

        if crash_during == "pre_action":
            cb_error.create(crash_type)

        if action == "create":
            create_scope(self.client_type, self.bucket, self.scope_name)
        elif action == "remove":
            remove_scope(self.client_type, self.bucket, self.scope_name)

        if crash_during == "post_action":
            cb_error.create(crash_type)

        if data_load_option == "mutate_default_collection":
            task = self.task.async_load_gen_docs(
                self.cluster, self.bucket, doc_gen, "update",
                exp=self.maxttl,
                batch_size=200, process_concurrency=8,
                compression=self.sdk_compression,
                durability=self.durability_level,
                timeout_secs=self.sdk_timeout)
            self.task_manager.get_task_result(task)

        self.sleep(60, "Wait before reverting the error scenario")
        cb_error.revert(crash_type)

        # Close SSH and SDK connections
        shell.disconnect()
        if self.client_type == "sdk":
            client.close()

        self.bucket_util.validate_docs_per_collections_all_buckets()
        self.validate_test_failure()
예제 #16
0
    def test_rollback_after_disk_full(self):
        self.doc_ops = "create"
        self.create_start = self.init_items_per_collection
        self.create_end = self.init_items_per_collection * 2
        start = self.num_items
        items = self.num_items
        mem_only_items = self.input.param("rollback_items", 100000)
        self.gen_read = copy.deepcopy(self.gen_create)

        # Fill Disk on nodeB leaving 100MB
        self.fill_disk(self.cluster.nodes_in_cluster[-1], free=100)

        # Stopping persistence on NodeA
        shell = RemoteMachineShellConnection(self.cluster.master)
        cbstats = Cbstats(self.cluster.master)
        self.target_vbucket = cbstats.vbucket_list(
            self.cluster.buckets[0].name)
        mem_client = MemcachedClientHelper.direct_client(
            self.cluster.master, self.cluster.buckets[0])
        mem_client.stop_persistence()
        self.gen_create = self.genrate_docs_basic(start, mem_only_items,
                                                  self.target_vbucket)

        self.loadgen_docs(_sync=True, retry_exceptions=self.retry_exceptions)
        start = self.gen_create.key_counter

        ep_queue_size_map = {self.cluster.nodes_in_cluster[0]: mem_only_items}
        #ep_data_write_failed = {self.cluster.nodes_in_cluster[-1]: 0}

        for bucket in self.cluster.buckets:
            self.bucket_util._wait_for_stat(bucket, ep_queue_size_map)
            #self.bucket_util._wait_for_stat(
            #    bucket,
            #    ep_data_write_failed,
            #    cbstat_cmd="all",
            #    stat_name="ep_data_write_failed",
            #    stat_cond=">",
            #    timeout=300)

        # Kill memcached on NodeA to trigger rollback on other Nodes
        # replica vBuckets
        self.sleep(120)
        shell.kill_memcached()
        self.sleep(10,
                   "sleep after MemCached kill on node {}".format(shell.ip))

        self.free_disk(self.cluster.nodes_in_cluster[-1])
        self.assertTrue(
            self.bucket_util._wait_warmup_completed(
                self.cluster.nodes_in_cluster,
                self.cluster.buckets[0],
                wait_time=self.wait_timeout * 10))
        self.sleep(10, "Not Required, but waiting for 10s after warm up")
        self.bucket_util.verify_stats_all_buckets(self.cluster,
                                                  items,
                                                  timeout=300)

        data_validation = self.task.async_validate_docs(
            self.cluster,
            self.cluster.buckets[0],
            self.gen_read,
            "create",
            0,
            batch_size=self.batch_size,
            process_concurrency=self.process_concurrency,
            sdk_client_pool=self.sdk_client_pool,
            timeout_secs=self.sdk_timeout)
        self.task.jython_task_manager.get_task_result(data_validation)

        shell.disconnect()
예제 #17
0
    def common_test_body(self, failover_reason, rebalance_type=None):
        """
            Main Test body which contains the flow of the failover basic steps
            1. Starts Operations if programmed into the test case(before/after)
            2. Start View and Index Building operations
            3. Failover K out of N nodes (failover can be HARD/GRACEFUL)
            4.1 Rebalance the cluster is failover of K nodeStatuses
            4.2 Run Add-Back operation with recoveryType = (full/delta)
                with rebalance
            5. Verify all expected operations completed by checking
               stats, replicaiton, views, data correctness
        """
        # Pick the reference node for communication
        # We pick a node in the cluster which will NOT be failed over
        self.filter_list = []
        if self.failoverMaster:
            self.master = self.cluster.servers[1]
        else:
            self.master = self.cluster.master
        self.log.info(
            " Picking node {0} as reference node for test case".format(
                self.master.ip))
        self.print_test_params(failover_reason)
        self.rest = RestConnection(self.master)
        self.nodes = self.rest.node_statuses()
        # Set the data path for the cluster
        self.data_path = self.rest.get_data_path()

        # Variable to decide the durability outcome
        durability_will_fail = False
        # Variable to track the number of nodes failed
        num_nodes_failed = 1

        # Find nodes that will under go failover
        if self.failoverMaster:
            self.chosen = self.cluster_util.pick_nodes(
                self.master, howmany=1, target_node=self.servers[0])
        else:
            self.chosen = self.cluster_util.pick_nodes(
                self.master, howmany=self.num_failed_nodes)

        # Perform operations - Create/Update/Delete
        # self.withMutationOps = True => Run Operations in parallel to failover
        # self.withMutationOps = False => Run Operations Before failover
        self.load_initial_data()
        if not self.withMutationOps:
            self.run_mutation_operations()

        if self.test_abort_snapshot:
            self.log.info("Creating abort scenarios for vbs")
            for server in self.cluster_util.get_kv_nodes():
                ssh_shell = RemoteMachineShellConnection(server)
                cbstats = Cbstats(ssh_shell)
                replica_vbs = cbstats.vbucket_list(
                    self.bucket_util.buckets[0].name, "replica")
                load_gen = doc_generator(self.key,
                                         0,
                                         5000,
                                         target_vbucket=replica_vbs)
                success = self.bucket_util.load_durable_aborts(
                    ssh_shell, [load_gen], self.bucket_util.buckets[0],
                    self.durability_level, "update", "all_aborts")
                if not success:
                    self.log_failure("Simulating aborts failed")
                ssh_shell.disconnect()

            self.validate_test_failure()

        # Perform View Creation Tasks and
        # check for completion if required before failover
        if self.withViewsOps:
            self.run_view_creation_operations(self.servers)
            if not self.createIndexesDuringFailover:
                self.query_and_monitor_view_tasks(self.servers)

        # Take snap-shot of data set used for validation
        record_static_data_set = {}
        if not self.withMutationOps:
            record_static_data_set = self.bucket_util.get_data_set_all(
                self.cluster.servers, self.bucket_util.buckets, path=None)

        prev_vbucket_stats = self.bucket_util.get_vbucket_seqnos(
            self.servers[:self.nodes_init], self.bucket_util.buckets)
        prev_failover_stats = self.bucket_util.get_failovers_logs(
            self.servers[:self.nodes_init], self.bucket_util.buckets)

        # Perform Operations related to failover
        if self.withMutationOps or self.withViewsOps or self.compact:
            self.run_failover_operations_with_ops(self.chosen, failover_reason)
        else:
            self.run_failover_operations(self.chosen, failover_reason)

        # Decide whether the durability is going to fail or not
        if self.num_failed_nodes >= 1 and self.num_replicas > 1:
            durability_will_fail = True

        # Construct target vbucket list from the nodes
        # which are going to be failed over
        vbucket_list = list()
        for target_node in self.chosen:
            for server in self.servers:
                if server.ip == target_node.ip:
                    # Comment out the break once vbucket_list method is fixed
                    break
                    shell_conn = RemoteMachineShellConnection(server)
                    cb_stats = Cbstats(shell_conn)
                    vbuckets = cb_stats.vbucket_list(
                        self.bucket_util.buckets[0].name,
                        self.target_vbucket_type)
                    shell_conn.disconnect()
                    vbucket_list += vbuckets

        # Code to generate doc_loaders that will work on vbucket_type
        # based on targeted nodes. This will perform CRUD only on
        # vbuckets which will be affected by the failover
        self.gen_create = doc_generator(self.key,
                                        self.num_items,
                                        self.num_items * 1.5,
                                        target_vbucket=vbucket_list)
        self.gen_update = doc_generator(self.key,
                                        self.num_items / 2,
                                        self.num_items,
                                        target_vbucket=vbucket_list)
        self.gen_delete = doc_generator(self.key,
                                        self.num_items / 4,
                                        self.num_items / 2 - 1,
                                        target_vbucket=vbucket_list)
        self.afterfailover_gen_create = doc_generator(
            self.key,
            self.num_items * 1.6,
            self.num_items * 2,
            target_vbucket=vbucket_list)
        self.afterfailover_gen_update = doc_generator(
            self.key, 1, self.num_items / 4, target_vbucket=vbucket_list)
        self.afterfailover_gen_delete = doc_generator(
            self.key,
            self.num_items * 0.5,
            self.num_items * 0.75,
            target_vbucket=vbucket_list)

        # Perform Add Back Operation with Rebalance
        # or only Rebalance with verifications
        if not self.gracefulFailoverFail and self.runRebalanceAfterFailover:
            if self.failover_onebyone:
                # Reset it back to False
                durability_will_fail = False
                for node_chosen in self.chosen:
                    if num_nodes_failed > 1:
                        durability_will_fail = True

                    if self.add_back_flag:
                        # In add-back case, durability should never fail, since
                        # the num_nodes in the cluster will remain the same
                        self.run_add_back_operation_and_verify(
                            [node_chosen],
                            prev_vbucket_stats,
                            record_static_data_set,
                            prev_failover_stats,
                            rebalance_type=rebalance_type)
                    else:
                        self.run_rebalance_after_failover_and_verify(
                            [node_chosen],
                            prev_vbucket_stats,
                            record_static_data_set,
                            prev_failover_stats,
                            durability_will_fail=durability_will_fail)
                    num_nodes_failed += 1
            else:
                if self.add_back_flag:
                    self.run_add_back_operation_and_verify(
                        self.chosen,
                        prev_vbucket_stats,
                        record_static_data_set,
                        prev_failover_stats,
                        durability_will_fail=durability_will_fail,
                        rebalance_type=rebalance_type)
                else:
                    self.run_rebalance_after_failover_and_verify(
                        self.chosen,
                        prev_vbucket_stats,
                        record_static_data_set,
                        prev_failover_stats,
                        durability_will_fail=durability_will_fail)
        else:
            return

        # Will verify_unacked_bytes only if the durability is not going to fail
        if self.during_ops is None and not durability_will_fail:
            self.bucket_util.verify_unacked_bytes_all_buckets(
                filter_list=self.filter_list)
예제 #18
0
    def test_magma_rollback_n_times(self):
        items = self.num_items
        mem_only_items = self.input.param("rollback_items", 100000)
        if self.nodes_init < 2 or self.num_replicas < 1:
            self.fail("Not enough nodes/replicas in the cluster/bucket \
            to test rollback")
        self.num_rollbacks = self.input.param("num_rollbacks", 10)
        shell = RemoteMachineShellConnection(self.cluster_util.cluster.master)
        cbstats = Cbstats(shell)
        self.target_vbucket = cbstats.vbucket_list(
            self.bucket_util.buckets[0].name)
        start = self.num_items
        self.gen_read = copy.deepcopy(self.gen_create)
        for _ in xrange(1, self.num_rollbacks + 1):
            # Stopping persistence on NodeA
            mem_client = MemcachedClientHelper.direct_client(
                self.input.servers[0], self.bucket_util.buckets[0])
            mem_client.stop_persistence()

            self.gen_create = doc_generator(
                self.key,
                start,
                mem_only_items,
                doc_size=self.doc_size,
                doc_type=self.doc_type,
                target_vbucket=self.target_vbucket,
                vbuckets=self.cluster_util.vbuckets,
                randomize_doc_size=self.randomize_doc_size,
                randomize_value=self.randomize_value)

            self.loadgen_docs(_sync=True)
            start = self.gen_create.key_counter

            ep_queue_size_map = {
                self.cluster.nodes_in_cluster[0]: mem_only_items
            }
            vb_replica_queue_size_map = {self.cluster.nodes_in_cluster[0]: 0}

            for node in self.cluster.nodes_in_cluster[1:]:
                ep_queue_size_map.update({node: 0})
                vb_replica_queue_size_map.update({node: 0})

            for bucket in self.bucket_util.buckets:
                self.bucket_util._wait_for_stat(bucket, ep_queue_size_map)
                self.bucket_util._wait_for_stat(
                    bucket,
                    vb_replica_queue_size_map,
                    stat_name="vb_replica_queue_size")

            # Kill memcached on NodeA to trigger rollback on other Nodes
            # replica vBuckets
            for bucket in self.bucket_util.buckets:
                self.log.debug(cbstats.failover_stats(bucket.name))
            shell.kill_memcached()

            self.assertTrue(
                self.bucket_util._wait_warmup_completed(
                    [self.cluster_util.cluster.master],
                    self.bucket_util.buckets[0],
                    wait_time=self.wait_timeout * 10))
            self.sleep(10, "Not Required, but waiting for 10s after warm up")

            self.bucket_util.verify_stats_all_buckets(items, timeout=300)
            for bucket in self.bucket_util.buckets:
                self.log.debug(cbstats.failover_stats(bucket.name))

        data_validation = self.task.async_validate_docs(
            self.cluster,
            self.bucket_util.buckets[0],
            self.gen_read,
            "create",
            0,
            batch_size=self.batch_size,
            process_concurrency=self.process_concurrency,
            pause_secs=5,
            timeout_secs=self.sdk_timeout)
        self.task.jython_task_manager.get_task_result(data_validation)

        shell.disconnect()
예제 #19
0
파일: dgm.py 프로젝트: sreebhargava143/TAF
    def test_MB_40531(self):
        """
        Test to validate,
        1. Active resident ratio on the nodes never goes
           down below the replica_rr value
        2. 'evictable' (vb_replica_itm_mem - vb_replica_meta_data_mem) value
           never goes below wm_threshold of total bucket memory (ep_max_size)
        :return:
        """
        def check_replica_eviction():
            tbl = TableView(self.log.info)
            tbl.set_headers([
                "Node", "Memory", "WM_Threshold", "Itm_mem", "Meta_mem",
                "Evictable_mem", "A_rr", "R_rr"
            ])
            while self.test_failure is None and run_eviction_check:
                tbl.rows = []
                for kv_node in node_data.keys():
                    all_stats = \
                        node_data[kv_node]["cbstat"].all_stats(bucket.name)
                    bucket_mem = int(all_stats["ep_max_size"])
                    wm_threshold = \
                        (float(all_stats["ep_mem_high_wat_percent"])
                         - float(all_stats["ep_mem_low_wat_percent"]))*100
                    evictable_mem = \
                        int(all_stats["vb_replica_itm_memory"]) \
                        - int(all_stats["vb_replica_meta_data_memory"])
                    active_rr = int(all_stats["vb_active_perc_mem_resident"])
                    replica_rr = int(all_stats["vb_replica_perc_mem_resident"])

                    tbl.add_row([
                        kv_node.ip,
                        str(bucket_mem),
                        str(wm_threshold), all_stats["vb_replica_itm_memory"],
                        all_stats["vb_replica_meta_data_memory"],
                        str(evictable_mem),
                        str(active_rr),
                        str(replica_rr)
                    ])

                    if active_rr != 100 \
                            and evictable_mem > (bucket_mem/wm_threshold):
                        tbl.display("Node memory stats")
                        self.log_failure("%s - Active keys evicted before "
                                         "meeting the threshold: %s" %
                                         (kv_node.ip, all_stats))

                    if replica_rr > active_rr:
                        tbl.display("Node memory stats")
                        self.log_failure(
                            "%s: (active_rr) %s < %s (replica_rr)" %
                            (kv_node.ip, active_rr, replica_rr))

        bucket = self.bucket_util.buckets[0]
        node_data = dict()
        kv_nodes = self.cluster_util.get_kv_nodes()
        for node in kv_nodes:
            cbstat = Cbstats(RemoteMachineShellConnection(node))
            node_data[node] = dict()
            node_data[node]["cbstat"] = cbstat
            node_data[node]["active"] = cbstat.vbucket_list(
                bucket.name, "active")
            node_data[node]["replica"] = cbstat.vbucket_list(
                bucket.name, "replica")

        target_dgm = 30
        run_eviction_check = True
        bucket_helper = BucketHelper(self.cluster.master)

        eviction_check_thread = Thread(target=check_replica_eviction)
        eviction_check_thread.start()

        op_index = 0
        op_batch_size = 8000
        create_batch_size = 10000

        # Perform ADD/SET/READ until targeted DGM value is reached
        curr_dgm = bucket_helper.fetch_bucket_stats(
            bucket.name)["op"]["samples"]["vb_active_resident_items_ratio"][-1]
        self.log.info("Wait for DGM to reach %s%%. Current DGM: %s%%" %
                      (target_dgm, curr_dgm))
        while int(curr_dgm) > target_dgm and self.test_failure is None:
            create_gen = doc_generator(self.key,
                                       self.num_items,
                                       self.num_items + create_batch_size,
                                       key_size=self.key_size,
                                       doc_size=self.doc_size,
                                       mutation_type="ADD")
            update_gen = doc_generator(self.key,
                                       op_index,
                                       op_index + op_batch_size,
                                       key_size=self.key_size,
                                       doc_size=self.doc_size,
                                       mutation_type="ADD")
            read_gen = doc_generator(self.key,
                                     op_index,
                                     op_index + op_batch_size,
                                     key_size=self.key_size,
                                     doc_size=0)

            create_task = self.task.async_load_gen_docs(
                self.cluster,
                bucket,
                create_gen,
                "create",
                0,
                persist_to=self.persist_to,
                replicate_to=self.replicate_to,
                durability=self.durability_level,
                timeout_secs=self.sdk_timeout,
                print_ops_rate=False,
                batch_size=200,
                process_concurrency=1)
            update_task = self.task.async_load_gen_docs(
                self.cluster,
                bucket,
                update_gen,
                "update",
                0,
                persist_to=self.persist_to,
                replicate_to=self.replicate_to,
                durability=self.durability_level,
                timeout_secs=self.sdk_timeout,
                print_ops_rate=False,
                batch_size=200,
                process_concurrency=1)
            read_task = self.task.async_load_gen_docs(
                self.cluster,
                bucket,
                read_gen,
                "read",
                timeout_secs=self.sdk_timeout,
                print_ops_rate=False,
                batch_size=200,
                process_concurrency=1)

            self.task_manager.get_task_result(create_task)
            self.task_manager.get_task_result(update_task)
            self.task_manager.get_task_result(read_task)

            # Update indexes for next iteration
            op_index += op_batch_size
            self.num_items += create_batch_size

            curr_dgm = bucket_helper.fetch_bucket_stats(
                bucket.name
            )["op"]["samples"]["vb_active_resident_items_ratio"][-1]
            self.log.info("Current DGM: %s%%" % curr_dgm)

        # Stop eviction check thread
        run_eviction_check = False
        eviction_check_thread.join()

        # Close shell connections
        for node in kv_nodes:
            node_data[node]["cbstat"].shellConn.disconnect()

        self.validate_test_failure()
예제 #20
0
    def test_ttl_less_than_durability_timeout(self):
        """
        MB-43238
        1. Regular write with TTL 1 second for some key
        2. Disable expiry pager (to prevent raciness)
        3. Wait TTL period
        4. Disable persistence on the node with the replica vBucket for that key
        5. SyncWrite PersistMajority to active vBucket for that key (should hang)
        6. Access key on other thread to trigger expiry
        7. Observe DCP connection being torn down without fix
        """
        def perform_sync_write():
            client.crud(DocLoading.Bucket.DocOps.CREATE,
                        key, {},
                        durability=Bucket.DurabilityLevel.PERSIST_TO_MAJORITY,
                        timeout=60)

        doc_ttl = 5
        target_node = None
        key = "test_ttl_doc"
        vb_for_key = self.bucket_util.get_vbucket_num_for_key(key)
        bucket = self.cluster.buckets[0]

        # Find target node for replica VB
        for target_node in self.cluster.nodes_in_cluster:
            cb_stats = Cbstats(target_node)
            if vb_for_key in cb_stats.vbucket_list(bucket.name, "replica"):
                break

        self.log.info("Target node: %s, Key: %s" % (target_node.ip, key))
        self.log.info("Disabling expiry_pager")
        shell = RemoteMachineShellConnection(target_node)
        cb_ep_ctl = Cbepctl(shell)
        cb_ep_ctl.set(bucket.name, "flush_param", "exp_pager_stime", 0)

        # Create SDK client
        client = SDKClient([self.cluster.master], bucket)

        self.log.info("Non-sync write with TTL=%s" % doc_ttl)
        client.crud(DocLoading.Bucket.DocOps.CREATE, key, {}, exp=doc_ttl)

        self.sleep(doc_ttl, "Wait for document to expire")
        self.bucket_util._wait_for_stats_all_buckets(self.cluster,
                                                     self.cluster.buckets)

        self.log.info("Stopping persistence on replica VB node using cbepctl")
        cb_ep_ctl.persistence(bucket.name, "stop")

        # Start doc_load with lesser ttl
        doc_create_thread = Thread(target=perform_sync_write)
        doc_create_thread.start()
        self.sleep(2, "Wait for sync_write thread to start")

        self.log.info("Read key from another thread to trigger expiry")
        failure = None
        result = client.crud(DocLoading.Bucket.DocOps.READ, key)
        if SDKException.DocumentNotFoundException not in str(result["error"]):
            failure = "Invalid exception: %s" % result["error"]

        self.log.info("Resuming persistence on target node")
        cb_ep_ctl.persistence(bucket.name, "start")

        # Wait for doc_create_thread to complete
        doc_create_thread.join()

        # Close SDK client and shell connections
        client.close()
        shell.disconnect()

        if failure:
            self.fail(failure)

        for node in self.cluster.nodes_in_cluster:
            cb_stats = Cbstats(node).all_stats(bucket.name)
            self.log.info("Node: %s, ep_expired_access: %s" %
                          (node.ip, cb_stats["ep_expired_access"]))
            self.assertEqual(int(cb_stats["ep_expired_access"]), 0,
                             "%s: ep_expired_access != 0" % node.ip)
예제 #21
0
    def test_maxttl_with_timeout(self):
        """
        1. Stop Memcached on target_nodes based on replicas configured.
        2. Initiate doc_ops with higher sdk_timeout
        3. Sleep for time within the configured sdk_timeout
        4. Resume Memcached on target_nodes to make sure doc_ops go through
        5. Make sure maxTTL is calculated as soon as the active vbucket
           receives the mutation
        :return:
        """
        shell_conn = dict()
        target_vbuckets = list()
        target_nodes = self.getTargetNodes()
        def_bucket = self.cluster.buckets[0]
        self.maxttl = self.input.param("doc_ttl", self.maxttl)

        # Open required SDK connections before error_simulation
        gen_create = doc_generator(self.key,
                                   0,
                                   self.num_items,
                                   doc_size=self.doc_size,
                                   doc_type=self.doc_type,
                                   target_vbucket=target_vbuckets,
                                   vbuckets=self.cluster.vbuckets)
        doc_op_task = self.task.async_load_gen_docs(
            self.cluster,
            def_bucket,
            gen_create,
            "create",
            self.maxttl,
            batch_size=10,
            process_concurrency=8,
            replicate_to=self.replicate_to,
            persist_to=self.persist_to,
            durability=self.durability_level,
            timeout_secs=self.sdk_timeout,
            compression=self.sdk_compression,
            start_task=False,
            sdk_client_pool=self.sdk_client_pool)

        # Open shell_conn and create Memcached error for testing MaxTTL
        self.log.info("1. Stopping Memcached on target_nodes")
        for node in target_nodes:
            shell_conn[node.ip] = RemoteMachineShellConnection(node)
            cbstats = Cbstats(shell_conn[node.ip])
            target_vbuckets += cbstats.vbucket_list(def_bucket.name, "replica")
            cb_error = CouchbaseError(self.log, shell_conn[node.ip])
            cb_error.create(CouchbaseError.STOP_MEMCACHED, def_bucket.name)

        self.log.info("2. Initiating the doc_ops with doc TTL")
        self.task_manager.add_new_task(doc_op_task)

        self.sleep(self.maxttl, "3. Sleep for max_ttl time")

        # Revert Memcached error and close the shell_conn
        self.log.info("4. Resuming Memcached on target_nodes")
        for node in target_nodes:
            cb_error = CouchbaseError(self.log, shell_conn[node.ip])
            cb_error.revert(CouchbaseError.STOP_MEMCACHED, def_bucket.name)
            shell_conn[node.ip].disconnect()

        self.log.info("5. Waiting for doc_ops to complete")
        self.task.jython_task_manager.get_task_result(doc_op_task)

        self.bucket_util._expiry_pager(self.cluster, val=1)
        self.sleep(10, "6. Waiting for items to be purged")

        # Read all expired docs to validate all keys present
        doc_op_task = self.task.async_load_gen_docs(
            self.cluster,
            def_bucket,
            gen_create,
            "read",
            batch_size=10,
            process_concurrency=8,
            timeout_secs=self.sdk_timeout,
            sdk_client_pool=self.sdk_client_pool)
        self.task.jython_task_manager.get_task_result(doc_op_task)

        self.log.info("7. Validating docs expired after TTL, "
                      "even before sync_write succeeds")
        if len(doc_op_task.success.keys()) == self.num_items:
            self.fail("No docs deleted after MaxTTL time: %s" %
                      doc_op_task.success.keys())

        self.sleep(10, "8. Waiting for all docs to be purged")
        # Read all expired docs to validate all keys present
        doc_op_task = self.task.async_load_gen_docs(
            self.cluster,
            def_bucket,
            gen_create,
            "read",
            batch_size=10,
            process_concurrency=8,
            timeout_secs=self.sdk_timeout,
            sdk_client_pool=self.sdk_client_pool)
        self.task.jython_task_manager.get_task_result(doc_op_task)

        self.log.info("9. Validating docs expired after TTL")
        if len(doc_op_task.fail.keys()) != self.num_items:
            self.fail("Items not deleted after MaxTTL time: %s" %
                      doc_op_task.success.keys())

        # Validate cas for purged items
        keys_with_cas = list()
        for key, result in doc_op_task.fail.items():
            if result['cas'] != 0:
                keys_with_cas.append(key)
        if len(keys_with_cas) != 0:
            self.fail("Following failed keys has CAS: %s" % keys_with_cas)

        # Recreate all docs without any node issues
        doc_op_task = self.task.async_load_gen_docs(
            self.cluster,
            def_bucket,
            gen_create,
            "create",
            0,
            batch_size=10,
            process_concurrency=8,
            durability=self.durability_level,
            timeout_secs=self.sdk_timeout,
            compression=self.sdk_compression,
            sdk_client_pool=self.sdk_client_pool)
        self.task.jython_task_manager.get_task_result(doc_op_task)

        self.log.info("10. Validating docs exists after creation")
        if len(doc_op_task.fail.keys()) != 0:
            self.fail("Doc recreate failed for keys: %s" %
                      doc_op_task.fail.keys())

        # Final doc_count validation
        self.bucket_util._wait_for_stats_all_buckets(self.cluster,
                                                     self.cluster.buckets)
        self.bucket_util.verify_stats_all_buckets(self.cluster, self.num_items)
예제 #22
0
 def getVbucketNumbers(shell_conn, bucket_name, replica_type):
     cb_stats = Cbstats(shell_conn)
     return cb_stats.vbucket_list(bucket_name, replica_type)
예제 #23
0
    def test_index_with_aborts(self):
        """
        1. Create index (2i/view) on default bucket
        2. Load multiple docs such that all sync_writes will be aborted
        3. Verify nothing went into indexing
        4. Load sync_write docs such that they are successful
        5. Validate the mutated docs are taken into indexing
        :return:
        """

        crud_batch_size = 50
        def_bucket = self.cluster.buckets[0]
        kv_nodes = self.cluster_util.get_kv_nodes(self.cluster)
        replica_vbs = dict()
        verification_dict = dict()
        index_item_count = dict()
        expected_num_indexed = dict()
        load_gen = dict()
        load_gen["ADD"] = dict()
        load_gen["SET"] = dict()
        partial_aborts = ["initial_aborts", "aborts_at_end"]

        durability_helper = DurabilityHelper(
            self.log,
            len(self.cluster.nodes_in_cluster),
            durability=self.durability_level,
            replicate_to=self.replicate_to,
            persist_to=self.persist_to)

        if self.create_index_during == "before_doc_ops":
            self.create_gsi_indexes(def_bucket)

        curr_items = self.bucket_util.get_bucket_current_item_count(
            self.cluster, def_bucket)
        if self.sync_write_abort_pattern in ["all_aborts", "initial_aborts"]:
            self.bucket_util.flush_bucket(self.cluster, def_bucket)
            self.num_items = 0
        else:
            self.num_items = curr_items

        self.log.info("Disabling auto_failover to avoid node failures")
        status = RestConnection(self.cluster.master) \
            .update_autofailover_settings(False, 120)
        self.assertTrue(status, msg="Failure during disabling auto-failover")

        # Validate vbucket stats
        verification_dict["ops_create"] = self.num_items
        verification_dict["ops_update"] = 0
        # verification_dict["ops_delete"] = 0
        verification_dict["rollback_item_count"] = 0
        verification_dict["sync_write_aborted_count"] = 0
        verification_dict["sync_write_committed_count"] = 0

        index_item_count["#primary"] = self.num_items
        index_item_count["durable_add_aborts"] = 0
        index_item_count["durable_set_aborts"] = 0
        expected_num_indexed["#primary"] = curr_items
        expected_num_indexed["durable_add_aborts"] = 0
        expected_num_indexed["durable_set_aborts"] = 0

        if self.create_index_during == "before_doc_ops":
            self.validate_indexed_doc_count(def_bucket, index_item_count)

        self.log.info("Loading docs such that all sync_writes will be aborted")
        for server in kv_nodes:
            ssh_shell = RemoteMachineShellConnection(server)
            cbstats = Cbstats(server)
            replica_vbs[server] = cbstats.vbucket_list(def_bucket.name,
                                                       "replica")
            load_gen["ADD"][server] = list()
            load_gen["ADD"][server].append(
                doc_generator(self.key,
                              0,
                              crud_batch_size,
                              target_vbucket=replica_vbs[server],
                              mutation_type="ADD"))
            if self.sync_write_abort_pattern in partial_aborts:
                load_gen["ADD"][server].append(
                    doc_generator(self.key,
                                  10000,
                                  crud_batch_size,
                                  target_vbucket=replica_vbs[server],
                                  mutation_type="ADD"))
                verification_dict["ops_create"] += crud_batch_size
                verification_dict["sync_write_committed_count"] += \
                    crud_batch_size
                index_item_count["#primary"] += crud_batch_size
                index_item_count["durable_add_aborts"] += crud_batch_size
                expected_num_indexed["#primary"] += crud_batch_size
                expected_num_indexed["durable_add_aborts"] += crud_batch_size

            task_success = self.bucket_util.load_durable_aborts(
                ssh_shell, load_gen["ADD"][server], self.cluster, def_bucket,
                self.durability_level, DocLoading.Bucket.DocOps.CREATE,
                self.sync_write_abort_pattern)
            if not task_success:
                self.log_failure("Failure during load_abort task")

            verification_dict["sync_write_aborted_count"] += \
                crud_batch_size
            if self.create_index_during == "before_doc_ops":
                self.validate_indexed_doc_count(def_bucket, index_item_count)

            load_gen["SET"][server] = list()
            load_gen["SET"][server].append(
                doc_generator(self.key,
                              0,
                              crud_batch_size,
                              target_vbucket=replica_vbs[server],
                              mutation_type="SET"))
            if self.sync_write_abort_pattern in partial_aborts:
                load_gen["SET"][server].append(
                    doc_generator(self.key,
                                  10000,
                                  crud_batch_size,
                                  target_vbucket=replica_vbs[server],
                                  mutation_type="SET"))
                verification_dict["ops_update"] += crud_batch_size
                verification_dict["sync_write_committed_count"] += \
                    crud_batch_size
                index_item_count["durable_add_aborts"] -= crud_batch_size
                index_item_count["durable_set_aborts"] += crud_batch_size
                expected_num_indexed["#primary"] += crud_batch_size
                expected_num_indexed["durable_add_aborts"] += crud_batch_size
                expected_num_indexed["durable_set_aborts"] += crud_batch_size

            verification_dict["sync_write_aborted_count"] += \
                crud_batch_size
            task_success = self.bucket_util.load_durable_aborts(
                ssh_shell, load_gen["SET"][server], self.cluster, def_bucket,
                self.durability_level, DocLoading.Bucket.DocOps.UPDATE,
                self.sync_write_abort_pattern)
            if not task_success:
                self.log_failure("Failure during load_abort task")

            ssh_shell.disconnect()

            if self.create_index_during == "before_doc_ops":
                self.validate_indexed_doc_count(def_bucket, index_item_count)
        failed = durability_helper.verify_vbucket_details_stats(
            def_bucket,
            kv_nodes,
            vbuckets=self.cluster.vbuckets,
            expected_val=verification_dict)
        if failed:
            self.log_failure("Cbstat vbucket-details verification failed")
        self.validate_test_failure()

        if self.create_index_during == "after_doc_ops":
            self.create_gsi_indexes(def_bucket)
            self.validate_indexed_doc_count(def_bucket, index_item_count)

        self.log.info("Verify aborts are not indexed")
        self.validate_indexed_count_from_stats(def_bucket,
                                               expected_num_indexed,
                                               index_item_count)

        if not self.use_gsi_for_primary:
            self.log.info("Wait of any indexing_activity to complete")
            index_monitor_task = self.cluster_util.async_monitor_active_task(
                self.cluster.master,
                "indexer",
                "_design/ddl_#primary",
                num_iteration=20,
                wait_task=True)[0]
            self.task_manager.get_task_result(index_monitor_task)
            self.assertTrue(index_monitor_task.result,
                            "Indexer task still running on server")

        for server in kv_nodes:
            if self.sync_write_abort_pattern == "initial_aborts":
                load_gen["ADD"][server] = load_gen["ADD"][server][:1]
                load_gen["SET"][server] = load_gen["SET"][server][:1]
            elif self.sync_write_abort_pattern == "aborts_at_end":
                load_gen["ADD"][server] = load_gen["ADD"][server][-1:]
                load_gen["SET"][server] = load_gen["SET"][server][-1:]

        self.log.info("Load sync_write docs such that they are successful")
        for server in kv_nodes:
            for gen_load in load_gen["ADD"][server]:
                task = self.task.async_load_gen_docs(
                    self.cluster,
                    def_bucket,
                    gen_load,
                    "create",
                    0,
                    batch_size=50,
                    process_concurrency=8,
                    replicate_to=self.replicate_to,
                    persist_to=self.persist_to,
                    durability=self.durability_level,
                    timeout_secs=self.sdk_timeout)
                self.task.jython_task_manager.get_task_result(task)

                if len(task.fail.keys()) != 0:
                    self.log_failure("Some failures seen during doc_ops")

                index_item_count["#primary"] += crud_batch_size
                index_item_count["durable_add_aborts"] += crud_batch_size
                expected_num_indexed["#primary"] += crud_batch_size
                expected_num_indexed["durable_add_aborts"] += crud_batch_size
                self.validate_indexed_doc_count(def_bucket, index_item_count)

            for gen_load in load_gen["SET"][server]:
                task = self.task.async_load_gen_docs(
                    self.cluster,
                    def_bucket,
                    gen_load,
                    "update",
                    0,
                    batch_size=50,
                    process_concurrency=8,
                    replicate_to=self.replicate_to,
                    persist_to=self.persist_to,
                    durability=self.durability_level,
                    timeout_secs=self.sdk_timeout)
                self.task.jython_task_manager.get_task_result(task)

                if len(task.fail.keys()) != 0:
                    self.log_failure("Some failures seen during doc_ops")

                index_item_count["durable_add_aborts"] -= crud_batch_size
                index_item_count["durable_set_aborts"] += crud_batch_size
                expected_num_indexed["#primary"] += crud_batch_size
                expected_num_indexed["durable_add_aborts"] += crud_batch_size
                expected_num_indexed["durable_set_aborts"] += crud_batch_size
                self.validate_indexed_doc_count(def_bucket, index_item_count)

        self.log.info("Validate the mutated docs are taken into indexing")
        self.validate_indexed_count_from_stats(def_bucket,
                                               expected_num_indexed,
                                               index_item_count)
        self.validate_test_failure()
예제 #24
0
    def test_flush_bucket_during_rollback(self):
        '''
        Test focus: Stopping persistence one by one on all nodes,
                    and trigger roll back on other  nodes,
                    During rollback flush the data
                    Above step will be done num_rollback
                    (variable defined in test) times

        STEPS:
         -- Ensure creation of at least a single state file
         -- Below steps will be repeated on all nodes, with stopping peristence on one at a time
         -- Stop persistence on node x
         -- Start load on node x for a given duration(self.duration * 60 seconds)
         -- Above step ensures creation of new state files (# equal to self.duration)
         -- Kill MemCached on Node x
         -- Trigger roll back on other/replica nodes
         -- ReStart persistence on Node -x
         -- Repeat all the above steps for num_rollback times
        '''
        self.assertTrue(self.rest.update_autofailover_settings(False, 600),
                        "AutoFailover disabling failed")
        items = copy.deepcopy(self.init_items_per_collection)
        mem_only_items = self.input.param("rollback_items", 10000)

        ops_len = len(self.doc_ops.split(":"))

        if self.nodes_init < 2 or self.num_replicas < 1:
            self.fail("Not enough nodes/replicas in the cluster/bucket \
            to test rollback")

        self.duration = self.input.param("duration", 2)
        self.num_rollbacks = self.input.param("num_rollbacks", 3)

        #######################################################################
        '''
        STEP - 1, Ensures creation of at least one snapshot

        To ensure at least one snapshot should get created before rollback
        starts, we need to sleep for 60 seconds as per magma design which
        create state file every 60s

        '''
        self.sleep(60, "Ensures creation of at least one snapshot")
        #######################################################################
        '''
        STEP - 2,  Stop persistence on node - x
        '''

        for i in range(1, self.num_rollbacks+1):
            self.log.info("Roll back Iteration == {}".format(i))
            start = items
            for x, node in enumerate(self.cluster.nodes_in_cluster):
                shell = RemoteMachineShellConnection(node)
                cbstats = Cbstats(shell)
                self.target_vbucket = cbstats.vbucket_list(self.cluster.buckets[0].
                                                   name)
                mem_item_count = 0
                # Stopping persistence on Node-x
                self.log.debug("Iteration == {}, Stopping persistence on Node-{}, ip ={}"
                               .format(i, x+1, node))
                Cbepctl(shell).persistence(self.cluster.buckets[0].name, "stop")

                ###############################################################
                '''
                STEP - 3
                  -- Load documents on node  x for  self.duration * 60 seconds
                  -- This step ensures new state files (number equal to self.duration)
                '''
                self.compute_docs(start, mem_only_items)
                self.gen_create = None
                self.gen_update = None
                self.gen_delete = None
                self.gen_expiry = None
                time_end = time.time() + 60 * self.duration
                itr = 0
                while time.time() < time_end:
                    itr += 1
                    time_start = time.time()
                    mem_item_count += mem_only_items * ops_len
                    self.generate_docs(doc_ops=self.doc_ops,
                                       target_vbucket=self.target_vbucket)
                    self.loadgen_docs(_sync=True,
                                      retry_exceptions=self.retry_exceptions)
                    if self.gen_create is not None:
                        self.create_start = self.gen_create.key_counter
                    if self.gen_update is not None:
                        self.update_start = self.gen_update.key_counter
                    if self.gen_delete is not None:
                        self.delete_start = self.gen_delete.key_counter
                    if self.gen_expiry is not None:
                        self.expiry_start = self.gen_expiry.key_counter

                    if time.time() < time_start + 60:
                        self.log.info("Rollback Iteration== {}, itr== {}, Active-Node=={}, Node=={}".format(i, itr, x+1, node))
                        self.sleep(time_start + 60 - time.time(),
                                   "Sleep to ensure creation of state files for roll back")
                        self.log.info("state files == {}".format(
                                     self.get_state_files(self.buckets[0])))
                ep_queue_size_map = {node:
                                     mem_item_count}
                if self.durability_level:
                    self.log.info("updating the num_items on disk check to double due to durability")
                    ep_queue_size_map = {node:
                                     mem_item_count * 2}
                vb_replica_queue_size_map = {node: 0}

                for nod in self.cluster.nodes_in_cluster:
                    if nod != node:
                        ep_queue_size_map.update({nod: 0})
                        vb_replica_queue_size_map.update({nod: 0})

                for bucket in self.cluster.buckets:
                    self.bucket_util._wait_for_stat(bucket, ep_queue_size_map,
                                                    timeout=1200)
                    self.bucket_util._wait_for_stat(bucket, vb_replica_queue_size_map,
                                                    cbstat_cmd="all",
                                                    stat_name="vb_replica_queue_size",
                                                    timeout=1200)
                # replica vBuckets
                for bucket in self.cluster.buckets:
                    self.log.debug(cbstats.failover_stats(bucket.name))

                ###############################################################
                '''
                STEP - 4
                  -- Kill Memcached on Node - x and trigger rollback on other nodes
                  -- After 20 seconds , flush bucket
                '''

                shell.kill_memcached()
                self.sleep(20, "sleep after killing memcached")
                self.bucket_util.flush_bucket(self.cluster, self.cluster.buckets[0])
                ###############################################################
                '''
                STEP -5
                 -- Restarting persistence on Node -- x
                '''
                self.assertTrue(self.bucket_util._wait_warmup_completed(
                    [self.cluster.master],
                    self.cluster.buckets[0],
                    wait_time=self.wait_timeout * 10))

                self.log.debug("Iteration=={}, Re-Starting persistence on Node -- {}".format(i, node))
                Cbepctl(shell).persistence(self.cluster.buckets[0].name, "start")

                self.sleep(5, "Sleep after re-starting persistence, Iteration{}".format(i))
                shell.disconnect()
                ###################################################################
                '''
                STEP - 6
                  -- Load Docs on all the nodes
                  -- Loading of doc for 60 seconds
                  -- Ensures creation of new state file
                '''
                self.create_start = 0
                self.create_end = self.init_items_per_collection
                self.generate_docs(doc_ops="create", target_vbucket=None)
                self.loadgen_docs(self.retry_exceptions,
                              self.ignore_exceptions, _sync=True,
                              doc_ops="create")
                self.bucket_util._wait_for_stats_all_buckets(self.cluster,
                                                             self.cluster.buckets,
                                                             timeout=1200)
예제 #25
0
    def rebalance_out_with_queries(self):
        num_views = self.input.param("num_views", 5)
        is_dev_ddoc = self.input.param("is_dev_ddoc", False)
        ddoc_name = "ddoc1"
        prefix = ("", "dev_")[is_dev_ddoc]

        query = dict()
        query["connectionTimeout"] = 60000
        query["full_set"] = "true"

        views = list()
        tasks = list()

        if self.test_abort_snapshot:
            self.log.info("Creating sync_write abort scenario for replica vbs")
            for server in self.cluster_util.get_kv_nodes(self.cluster):
                ssh_shell = RemoteMachineShellConnection(server)
                cbstats = Cbstats(ssh_shell)
                replica_vbs = cbstats.vbucket_list(
                    self.cluster.buckets[0].name, "replica")
                load_gen = doc_generator(self.key, 0, 5000,
                                         target_vbucket=replica_vbs)
                success = self.bucket_util.load_durable_aborts(
                    ssh_shell, [load_gen],
                    self.cluster.buckets[0],
                    self.durability_level,
                    "update", "all_aborts")
                if not success:
                    self.log_failure("Simulating aborts failed")
                ssh_shell.disconnect()

            self.validate_test_failure()

        for bucket in self.cluster.buckets:
            temp = self.bucket_util.make_default_views(
                self.default_view,
                self.default_view_name,
                num_views, is_dev_ddoc)
            temp_tasks = self.bucket_util.async_create_views(
                self.cluster.master, ddoc_name, temp, bucket)
            views += temp
            tasks += temp_tasks
        timeout = None
        if self.active_resident_threshold == 0:
            timeout = max(self.wait_timeout * 4, len(self.cluster.buckets) * self.wait_timeout * self.num_items / 50000)

        for task in tasks:
            self.task.jython_task_manager.get_task_result(task)

        for bucket in self.cluster.buckets:
            for view in views:
                # run queries to create indexes
                self.bucket_util.query_view(self.cluster.master, prefix + ddoc_name, view.name, query)

        active_tasks = self.cluster_util.async_monitor_active_task(
            self.cluster.servers, "indexer", "_design/" + prefix + ddoc_name,
            wait_task=False)
        for active_task in active_tasks:
            self.task_manager.get_task_result(active_task)
            self.assertTrue(active_task.result)

        expected_rows = self.num_items
        if self.max_verify:
            expected_rows = self.max_verify
            query["limit"] = expected_rows
        query["stale"] = "false"

        for bucket in self.cluster.buckets:
            self.bucket_util.perform_verify_queries(
                self.cluster.master, num_views, prefix, ddoc_name,
                self.default_view_name, query, expected_rows,
                bucket=bucket, wait_time=timeout)

        servs_out = self.cluster.servers[-self.nodes_out:]
        rebalance = self.task.async_rebalance([self.cluster.master], [], servs_out)
        self.sleep(self.wait_timeout / 5)
        # see that the result of view queries are the same as expected during the test
        for bucket in self.cluster.buckets:
            self.bucket_util.perform_verify_queries(
                self.cluster.master, num_views, prefix, ddoc_name,
                self.default_view_name, query, expected_rows,
                bucket=bucket, wait_time=timeout)
        # verify view queries results after rebalancing
        self.task.jython_task_manager.get_task_result(rebalance)
        self.assertTrue(rebalance.result, "Rebalance Failed")
        self.cluster.nodes_in_cluster = list(set(self.cluster.nodes_in_cluster) - set(servs_out))
        for bucket in self.cluster.buckets:
            self.bucket_util.perform_verify_queries(
                self.cluster.master, num_views, prefix, ddoc_name,
                self.default_view_name, query, expected_rows,
                bucket=bucket, wait_time=timeout)
        if not self.atomicity:
            self.bucket_util.verify_cluster_stats(self.cluster, self.num_items,
                                                  timeout=self.wait_timeout)
            self.bucket_util.verify_unacked_bytes_all_buckets(self.cluster)
예제 #26
0
    def test_rollback_during_compaction(self):
        '''
        '''
        self.assertTrue(self.rest.update_autofailover_settings(False, 600),
                        "AutoFailover disabling failed")
        items = copy.deepcopy(self.num_items)
        mem_only_items = self.input.param("rollback_items", 10000)

        ops_len = len(self.doc_ops.split(":"))

        if self.nodes_init < 2 or self.num_replicas < 1:
            self.fail("Not enough nodes/replicas in the cluster/bucket \
            to test rollback")

        self.num_rollbacks = self.input.param("num_rollbacks", 1)

        #######################################################################
        '''
        STEP - 1,  Stop persistence on node - x
        '''

        for i in range(1, self.num_rollbacks+1):
            self.log.info("Roll back Iteration == {}".format(i))
            start = items
            shell = RemoteMachineShellConnection(self.cluster.master)
            cbstats = Cbstats(self.cluster.master)
            self.target_vbucket = cbstats.vbucket_list(self.cluster.buckets[0].
                                                   name)
            mem_item_count = 0
            # Stopping persistence on Node-x
            self.log.debug("Stopping persistence on Node-{}"
                               .format(self.cluster.master))
            Cbepctl(shell).persistence(self.cluster.buckets[0].name, "stop")

            ###############################################################
            '''
            STEP - 3
              -- Load documents on node  x for  self.duration * 60 seconds
              -- This step ensures new state files (number equal to self.duration)
            '''
            self.compute_docs(start, mem_only_items)
            self.gen_create = None
            self.gen_update = None
            self.gen_delete = None
            self.gen_expiry = None
            mem_item_count += mem_only_items * ops_len
            self.generate_docs(doc_ops=self.doc_ops,
                               target_vbucket=self.target_vbucket)
            self.loadgen_docs(_sync=True,
                              retry_exceptions=self.retry_exceptions,
                              ignore_exceptions=self.ignore_exceptions)
            if self.gen_create is not None:
                self.create_start = self.gen_create.key_counter
            if self.gen_update is not None:
                self.update_start = self.gen_update.key_counter
            if self.gen_delete is not None:
                self.delete_start = self.gen_delete.key_counter
            if self.gen_expiry is not None:
                self.expiry_start = self.gen_expiry.key_counter

            ep_queue_size_map = {self.cluster.nodes_in_cluster[0]:
                                 mem_item_count}
            if self.durability_level:
                self.log.info("updating the num_items on disk check to double due to durability")
                ep_queue_size_map = {self.cluster.nodes_in_cluster[0]:
                                     mem_item_count * 2}
                vb_replica_queue_size_map = {self.cluster.nodes_in_cluster[0]:
                                             0}

            for node in self.cluster.nodes_in_cluster[1:]:
                ep_queue_size_map.update({node: 0})
                vb_replica_queue_size_map.update({node: 0})

                #for bucket in self.cluster.buckets:
                #    self.bucket_util._wait_for_stat(bucket, ep_queue_size_map,
                #                                    timeout=1200)
                #    self.bucket_util._wait_for_stat(bucket, vb_replica_queue_size_map,
                #                                    cbstat_cmd="all",
                #                                    stat_name="vb_replica_queue_size",
                #                                    timeout=1200)
                # replica vBuckets
                #for bucket in self.cluster.buckets:
                #    self.log.debug(cbstats.failover_stats(bucket.name))

            ###############################################################
            '''
            STEP - 4
                -- Kill Memcached on master node and trigger rollback on other nodes
            '''
            if self.compact_before:
                compaction_tasks=[]
                for bucket in self.cluster.buckets:
                    compaction_tasks.append(self.task.async_compact_bucket(self.cluster.master,
                                               bucket))
            shell.kill_memcached()

            if self.compact_after:
                self.bucket_util._run_compaction(self.cluster,
                                                 number_of_times=1)
            if self.compact_before:
                for task in compaction_tasks:
                    self.task_manager.get_task_result(task)

            self.assertTrue(self.bucket_util._wait_warmup_completed(
                [self.cluster.master],
                self.cluster.buckets[0],
                wait_time=self.wait_timeout * 10))

            ###############################################################
            '''
            STEP -5
                -- Restarting persistence on Node -- x
            '''

            self.log.debug("RollBack Iteration=={}, Re-Starting persistence on Node -- {}".format(i, self.cluster.master))
            Cbepctl(shell).persistence(self.cluster.buckets[0].name, "start")

            self.sleep(5, "Sleep after re-starting persistence, Iteration{}".format(i))
            for nod in self.cluster.nodes_in_cluster:
                ep_queue_size_map.update({nod: 0})
                vb_replica_queue_size_map.update({nod: 0})
            for bucket in self.cluster.buckets:
                self.bucket_util._wait_for_stat(bucket,
                                                    ep_queue_size_map, timeout=600)
                self.bucket_util._wait_for_stat(bucket,
                                                vb_replica_queue_size_map,
                                                cbstat_cmd="all",
                                                stat_name="vb_replica_queue_size", timeout=600)
            shell.disconnect()
            ###################################################################
            '''
            STEP - 6
              -- Load Docs on all the nodes
              -- Loading of doc for 60 seconds
              -- Ensures creation of new state file
            '''
            self.create_start = items
            self.create_end = items + 100000
            self.generate_docs(doc_ops="create", target_vbucket=None)
            self.loadgen_docs(self.retry_exceptions,
                                  self.ignore_exceptions, _sync=True,
                                  doc_ops="create")
            self.bucket_util._wait_for_stats_all_buckets(
                self.cluster, self.cluster.buckets, timeout=1200)
            items = items + 100000
            self.log.debug("Iteration == {}, Total num_items {}".format(i, items))
예제 #27
0
    def setup_for_test(self, skip_data_loading=False):
        if not skip_data_loading:
            # Load Couchbase bucket first
            self.perform_doc_ops_in_all_cb_buckets(
                "create", 0, self.num_items, durability=self.durability_level)
            self.bucket_util.verify_stats_all_buckets(self.num_items)

        if self.test_abort_snapshot:
            self.log.info("Creating sync_write aborts before dataset creation")
            for server in self.cluster_util.get_kv_nodes():
                ssh_shell = RemoteMachineShellConnection(server)
                cbstats = Cbstats(ssh_shell)
                replica_vbs = cbstats.vbucket_list(
                    self.bucket_util.buckets[0].name, "replica")
                load_gen = doc_generator("test_abort_key",
                                         0,
                                         self.num_items,
                                         target_vbucket=replica_vbs)
                success = self.bucket_util.load_durable_aborts(
                    ssh_shell, [load_gen], self.bucket_util.buckets[0],
                    self.durability_level, "update", "all_aborts")
                if not success:
                    self.log_failure("Simulating aborts failed")
                ssh_shell.disconnect()

            self.validate_test_failure()

        # Create dataset on the CBAS bucket
        self.cbas_util.create_dataset_on_bucket(
            cbas_bucket_name=self.cb_bucket_name,
            cbas_dataset_name=self.cbas_dataset_name)

        if self.test_abort_snapshot:
            self.log.info("Creating sync_write aborts after dataset creation")
            for server in self.cluster_util.get_kv_nodes():
                ssh_shell = RemoteMachineShellConnection(server)
                cbstats = Cbstats(ssh_shell)
                replica_vbs = cbstats.vbucket_list(
                    self.bucket_util.buckets[0].name, "replica")
                load_gen = doc_generator("test_abort_key",
                                         0,
                                         self.num_items,
                                         target_vbucket=replica_vbs)
                success = self.bucket_util.load_durable_aborts(
                    ssh_shell, [load_gen], self.bucket_util.buckets[0],
                    self.durability_level, "update", "all_aborts")
                if not success:
                    self.log_failure("Simulating aborts failed")
                ssh_shell.disconnect()

            self.validate_test_failure()

        # Create indexes on the CBAS bucket
        self.create_secondary_indexes = \
            self.input.param("create_secondary_indexes", True)
        if self.create_secondary_indexes:
            self.index_fields = "profession:string,number:bigint"
            create_idx_statement = "create index {0} on {1}({2});".format(
                self.index_name, self.cbas_dataset_name, self.index_fields)
            status, metrics, errors, results, _ = \
                self.cbas_util.execute_statement_on_cbas_util(
                    create_idx_statement)

            self.assertTrue(status == "success", "Create Index query failed")

            self.assertTrue(
                self.cbas_util.verify_index_created(
                    self.index_name, self.index_fields.split(","),
                    self.cbas_dataset_name)[0])

        # Connect to Bucket
        self.cbas_util.connect_to_bucket(
            cbas_bucket_name=self.cbas_bucket_name,
            cb_bucket_password=self.cb_bucket_password)

        if self.test_abort_snapshot:
            self.log.info("Creating sync_write aborts after dataset connect")
            for server in self.cluster_util.get_kv_nodes():
                ssh_shell = RemoteMachineShellConnection(server)
                cbstats = Cbstats(ssh_shell)
                replica_vbs = cbstats.vbucket_list(
                    self.bucket_util.buckets[0].name, "replica")
                load_gen = doc_generator("test_abort_key",
                                         0,
                                         self.num_items,
                                         target_vbucket=replica_vbs)
                success = self.bucket_util.load_durable_aborts(
                    ssh_shell, [load_gen], self.bucket_util.buckets[0],
                    self.durability_level, "update", "all_aborts")
                if not success:
                    self.log_failure("Simulating aborts failed")
                ssh_shell.disconnect()

            self.validate_test_failure()

        if not skip_data_loading:
            # Validate no. of items in CBAS dataset
            if not self.cbas_util.validate_cbas_dataset_items_count(
                    self.cbas_dataset_name, self.num_items):
                self.fail("No. of items in CBAS dataset do not match "
                          "that in the CB bucket")
예제 #28
0
    def common_test_body(self, failover_reason, rebalance_type=None):
        """
            Main Test body which contains the flow of the failover basic steps
            1. Starts Operations if programmed into the test case(before/after)
            2. Start View and Index Building operations
            3. Failover K out of N nodes (failover can be HARD/GRACEFUL)
            4.1 Rebalance the cluster is failover of K nodeStatuses
            4.2 Run Add-Back operation with recoveryType = (full/delta)
                with rebalance
            5. Verify all expected operations completed by checking
               stats, replicaiton, views, data correctness
        """
        # Pick the reference node for communication
        # We pick a node in the cluster which will NOT be failed over
        self.filter_list = []
        if self.failoverMaster:
            self.master = self.cluster.servers[1]
        else:
            self.master = self.cluster.master
        self.log.info(
            " Picking node {0} as reference node for test case".format(
                self.master.ip))
        self.print_test_params(failover_reason)
        self.rest = RestConnection(self.master)
        self.nodes = self.rest.node_statuses()
        # Set the data path for the cluster
        self.data_path = self.rest.get_data_path()

        # Variable to decide the durability outcome
        durability_will_fail = False
        # Variable to track the number of nodes failed
        num_nodes_failed = 1

        # Check if the test case has to be run for 3.0.0
        versions = self.rest.get_nodes_versions()
        self.version_greater_than_2_5 = True
        for version in versions:
            if "3" > version:
                self.version_greater_than_2_5 = False

        # Do not run this this test if graceful category is being used
        if not self.version_greater_than_2_5 \
                and (self.graceful or self.recoveryType is not None):
            self.log.error(
                "Can't apply graceful failover to nodes with version < 3.*")
            self.log.error("Please check configuration params: SKIPPING TEST")
            return

        # Find nodes that will under go failover
        if self.failoverMaster:
            self.chosen = RebalanceHelper.pick_nodes(
                self.master, howmany=1, target_node=self.servers[0])
        else:
            self.chosen = RebalanceHelper.pick_nodes(
                self.master, howmany=self.num_failed_nodes)

        # Perform operations - Create/Update/Delete
        # self.withMutationOps = True => Run Operations in parallel to failover
        # self.withMutationOps = False => Run Operations Before failover
        self.load_initial_data()
        if not self.withMutationOps:
            self.run_mutation_operations()
        # Perform View Creation Tasks and
        # check for completion if required before failover
        if self.withViewsOps:
            self.run_view_creation_operations(self.servers)
            if not self.createIndexesDuringFailover:
                self.query_and_monitor_view_tasks(self.servers)

        # Take snap-shot of data set used for validation
        record_static_data_set = {}
        prev_vbucket_stats = {}
        prev_failover_stats = {}
        if not self.withMutationOps:
            record_static_data_set = self.bucket_util.get_data_set_all(
                self.cluster.servers, self.buckets, path=None)

        # Capture  vbucket and failover stats if test version >= 2.5.*
        if self.version_greater_than_2_5 and self.upr_check:
            prev_vbucket_stats = self.bucket_util.get_vbucket_seqnos(
                self.servers, self.buckets)
            prev_failover_stats = self.bucket_util.get_failovers_logs(
                self.servers, self.buckets)

        # Perform Operations related to failover
        if self.withMutationOps or self.withViewsOps or self.compact:
            self.run_failover_operations_with_ops(self.chosen, failover_reason)
        else:
            self.run_failover_operations(self.chosen, failover_reason)

        target_bucket = self.bucket_util.buckets[0]

        # Update new_replica value, if provided in the conf
        if self.new_replica:
            self.num_replicas = self.new_replica
            bucket_helper = BucketHelper(self.master)
            bucket_helper.change_bucket_props(target_bucket.name,
                                              replicaNumber=self.num_replicas)

        # Decide whether the durability is going to fail or not
        if self.num_failed_nodes >= 1 and self.num_replicas > 1:
            durability_will_fail = True

        # Construct target vbucket list from the nodes
        # which are going to be failed over
        vbucket_list = list()
        for target_node in self.chosen:
            shell_conn = RemoteMachineShellConnection(target_node)
            cb_stats = Cbstats(shell_conn)
            vbuckets = cb_stats.vbucket_list(target_bucket.name,
                                             self.target_vbucket_type)
            shell_conn.disconnect()
            vbucket_list += vbuckets

        # Code to generate doc_loaders that will work on vbucket_type
        # based on targeted nodes. This will perform CRUD only on
        # vbuckets which will be affected by the failover
        self.gen_create = doc_generator(self.key,
                                        self.num_items,
                                        self.num_items * 1.5,
                                        target_vbucket=vbucket_list)
        self.gen_update = doc_generator(self.key,
                                        self.num_items / 2,
                                        self.num_items,
                                        target_vbucket=vbucket_list)
        self.gen_delete = doc_generator(self.key,
                                        self.num_items / 4,
                                        self.num_items / 2 - 1,
                                        target_vbucket=vbucket_list)
        self.afterfailover_gen_create = doc_generator(
            self.key,
            self.num_items * 1.6,
            self.num_items * 2,
            target_vbucket=vbucket_list)
        self.afterfailover_gen_update = doc_generator(
            self.key, 1, self.num_items / 4, target_vbucket=vbucket_list)
        self.afterfailover_gen_delete = doc_generator(
            self.key,
            self.num_items * 0.5,
            self.num_items * 0.75,
            target_vbucket=vbucket_list)

        # Perform Add Back Operation with Rebalance
        # or only Rebalance with verifications
        if not self.gracefulFailoverFail and self.runRebalanceAfterFailover:
            if self.failover_onebyone:
                # Reset it back to False
                durability_will_fail = False
                for node_chosen in self.chosen:
                    if num_nodes_failed > 1:
                        durability_will_fail = True

                    if self.add_back_flag:
                        # In add-back case, durability should never fail, since
                        # the num_nodes in the cluster will remain the same
                        self.run_add_back_operation_and_verify(
                            [node_chosen],
                            prev_vbucket_stats,
                            record_static_data_set,
                            prev_failover_stats,
                            rebalance_type=rebalance_type)
                    else:
                        self.run_rebalance_after_failover_and_verify(
                            [node_chosen],
                            prev_vbucket_stats,
                            record_static_data_set,
                            prev_failover_stats,
                            durability_will_fail=durability_will_fail)
                    num_nodes_failed += 1
            else:
                if self.add_back_flag:
                    self.run_add_back_operation_and_verify(
                        self.chosen,
                        prev_vbucket_stats,
                        record_static_data_set,
                        prev_failover_stats,
                        durability_will_fail=durability_will_fail,
                        rebalance_type=rebalance_type)
                else:
                    self.run_rebalance_after_failover_and_verify(
                        self.chosen,
                        prev_vbucket_stats,
                        record_static_data_set,
                        prev_failover_stats,
                        durability_will_fail=durability_will_fail)
        else:
            return

        # Will verify_unacked_bytes only if the durability is not going to fail
        if self.during_ops is None and not durability_will_fail:
            self.bucket_util.verify_unacked_bytes_all_buckets(
                filter_list=self.filter_list)
예제 #29
0
    def test_create_remove_collection_with_node_crash(self):
        """
        1. Select a error scenario to simulate in random
        2. Create error scenario either before or after collection action
        3. Initiate collection creation/deletion under the bucket
        4. Validate the outcome of collection creation/deletion
        """
        def create_collection(client_type, bucket_obj, scope, collection):
            if client_type == "sdk":
                client.create_collection(collection, scope)
                self.bucket_util.create_collection_object(bucket_obj, scope,
                                                          {"name": collection})
            elif client_type == "rest":
                self.bucket_util.create_collection(self.cluster.master,
                                                   bucket_obj,
                                                   scope,
                                                   {"name": collection})
            else:
                self.log_failure("Invalid client_type provided")

        def remove_collection(client_type, bucket_obj, scope, collection):
            if client_type == "sdk":
                client.drop_collection(scope, collection)
                self.bucket_util.mark_collection_as_dropped(bucket_obj, scope,
                                                            collection)
            elif client_type == "rest":
                self.bucket_util.drop_collection(self.cluster.master,
                                                 bucket_obj, scope, collection)
            else:
                self.log_failure("Invalid client_type provided")

        kv_nodes = self.cluster_util.get_kv_nodes()
        if len(kv_nodes) == 1:
            self.fail("Need atleast two KV nodes to run this test")

        client = None
        task = None
        action = self.input.param("action", "create")
        crash_during = self.input.param("crash_during", "pre_action")
        data_load_option = self.input.param("data_load_option", None)
        crash_type = self.input.param("simulate_error",
                                      CouchbaseError.KILL_MEMCACHED)

        if self.scope_name != CbServer.default_scope:
            self.scope_name = \
                BucketUtils.get_random_name(
                    max_length=CbServer.max_scope_name_len)
            self.bucket_util.create_scope(self.cluster.master, self.bucket,
                                          {"name": self.scope_name})
        if self.collection_name != CbServer.default_collection:
            self.collection_name = \
                BucketUtils.get_random_name(
                    max_length=CbServer.max_collection_name_len)

        # Select a KV node other than master node from the cluster
        node_to_crash = kv_nodes[sample(range(1, len(kv_nodes)), 1)[0]]

        client = self.sdk_client_pool.get_client_for_bucket(self.bucket)
        use_client = sample(["sdk", "rest"], 1)[0]

        if action == "remove" \
                and self.collection_name != CbServer.default_collection:
            # Create a collection to be removed
            create_collection(use_client, self.bucket,
                              self.scope_name, self.collection_name)

        # Create a error scenario
        self.log.info("Selected scenario for test '%s'" % crash_type)
        shell = RemoteMachineShellConnection(node_to_crash)
        cb_error = CouchbaseError(self.log, shell)
        cbstat_obj = Cbstats(shell)
        active_vbs = cbstat_obj.vbucket_list(self.bucket.name,
                                             vbucket_type="active")
        target_vbuckets = list(
            set(range(0, 1024)).difference(set(active_vbs)))
        doc_gen = doc_generator(self.key, 0, 1000,
                                target_vbucket=target_vbuckets)

        if crash_during == "pre_action":
            cb_error.create(crash_type)

        if data_load_option == "mutate_default_collection":
            task = self.task.async_load_gen_docs(
                self.cluster, self.bucket, doc_gen,
                DocLoading.Bucket.DocOps.UPDATE,
                exp=self.maxttl,
                batch_size=200, process_concurrency=8,
                compression=self.sdk_compression,
                durability=self.durability_level,
                timeout_secs=self.sdk_timeout)

        if action == "create":
            create_collection(self.client_type, self.bucket,
                              self.scope_name, self.collection_name)
        elif action == "remove":
            remove_collection(self.client_type, self.bucket,
                              self.scope_name, self.collection_name)

        if crash_during == "post_action":
            cb_error.create(crash_type)

        if data_load_option == "mutate_default_collection":
            self.task_manager.get_task_result(task)

        self.sleep(60, "Wait before reverting the error scenario")
        cb_error.revert(crash_type)

        # Close SSH and SDK connections
        shell.disconnect()
        if self.atomicity is False:
            self.bucket_util.validate_docs_per_collections_all_buckets(
                self.cluster)
        self.validate_test_failure()
예제 #30
0
    def online_swap(self, node_to_upgrade, version):
        vb_details = dict()
        vb_verification = dict()
        vb_types = ["active", "replica"]

        # Fetch active services on node_to_upgrade
        rest = self.__get_rest_node(node_to_upgrade)
        services = rest.get_nodes_services()
        services_on_target_node = services[(node_to_upgrade.ip + ":" +
                                            node_to_upgrade.port)]

        # Record vbuckets in swap_node
        if "kv" in services_on_target_node:
            shell = RemoteMachineShellConnection(node_to_upgrade)
            cbstats = Cbstats(shell)
            for vb_type in vb_types:
                vb_details[vb_type] = \
                    cbstats.vbucket_list(self.bucket.name, vb_type)
            shell.disconnect()

        # Install target version on spare node
        self.install_version_on_node([self.spare_node], version)

        # Fetch node not going to be involved in upgrade
        rest.add_node(self.creds.rest_username,
                      self.creds.rest_password,
                      self.spare_node.ip,
                      self.spare_node.port,
                      services=services_on_target_node)
        eject_otp_node = self.__get_otp_node(rest, node_to_upgrade)
        rest.rebalance(otpNodes=[node.id for node in rest.node_statuses()],
                       ejectedNodes=[eject_otp_node.id])
        self.sleep(5, "Wait for rebalance to start")
        rebalance_passed = rest.monitorRebalance()
        if not rebalance_passed:
            self.log_failure(
                "Swap rebalance failed during upgrade of {0}".format(
                    node_to_upgrade))
            return

        # VBuckets shuffling verification
        if "kv" in services_on_target_node:
            # Fetch vbucket stats after swap rebalance for verification
            shell = RemoteMachineShellConnection(self.spare_node)
            cbstats = Cbstats(shell)
            for vb_type in vb_types:
                vb_verification[vb_type] = \
                    cbstats.vbucket_list(self.bucket.name, vb_type)
            shell.disconnect()

            # Check vbuckets are shuffled or not
            for vb_type in vb_types:
                if vb_details[vb_type].sort() \
                        != vb_verification[vb_type].sort():
                    self.log_failure(
                        "%s vbuckets shuffled post swap_rebalance" % vb_type)
                    self.log.error("%s vbuckets before vs after: %s != %s" %
                                   (vb_type, vb_details[vb_type],
                                    vb_verification[vb_type]))

        # Update spare_node to rebalanced-out node
        self.spare_node = node_to_upgrade