Exemple #1
0
 def create(self, action=None, bucket_name="default"):
     self.log.info("Simulating '{0}' in {1}".format(action,
                                                    self.shell_conn.ip))
     if action == CouchbaseError.STOP_MEMCACHED:
         _, error = self.__interrupt_process("memcached", "stop")
         self.__handle_shell_error(error)
     elif action == CouchbaseError.KILL_MEMCACHED:
         _, error = self.__interrupt_process("memcached", "kill")
         self.__handle_shell_error(error)
     elif action == CouchbaseError.STOP_BEAMSMP:
         _, error = self.__interrupt_process("beam.smp", "stop")
         self.__handle_shell_error(error)
     elif action == CouchbaseError.STOP_PROMETHEUS:
         _, error = self.__interrupt_process("prometheus", "stop")
         self.__handle_shell_error(error)
     elif action == CouchbaseError.KILL_BEAMSMP:
         _, error = self.__interrupt_process("beam.smp", "kill")
         self.__handle_shell_error(error)
     elif action == CouchbaseError.KILL_PROMETHEUS:
         _, error = self.__interrupt_process("prometheus", "kill")
         self.__handle_shell_error(error)
     elif action == CouchbaseError.STOP_SERVER:
         self.shell_conn.stop_server()
     elif action == CouchbaseError.STOP_PERSISTENCE:
         cbepctl_obj = Cbepctl(self.shell_conn)
         cbepctl_obj.persistence(bucket_name, "stop")
     else:
         self.log.error("Unsupported action: '{0}'".format(action))
Exemple #2
0
    def test_rollback_and_persistence_race_condition(self):
        cluster = self.cluster
        gen_load = doc_generator(self.key, 0, self.num_items)
        for bucket in self.bucket_util.buckets:
            task = self.task.async_load_gen_docs(
                self.cluster, bucket, gen_load, "create", 0,
                batch_size=10, process_concurrency=8,
                replicate_to=self.replicate_to, persist_to=self.persist_to,
                timeout_secs=self.sdk_timeout, retries=self.sdk_retries)
            self.task.jython_task_manager.get_task_result(task)

        # Stop persistence
        for server in cluster.servers[:self.nodes_init]:
            # Create cbepctl command object
            node_shell_conn = RemoteMachineShellConnection(server)
            cbepctl_obj = Cbepctl(node_shell_conn)

            for bucket in self.bucket_util.buckets:
                cbepctl_obj.persistence(bucket.name, "stop")

            # Disconnect the shell_connection
            node_shell_conn.disconnect()

        self.sleep(10, "Wait after stop_persistence")

        # more (non-intersecting) load
        gen_load = doc_generator(self.key, 0, self.num_items, doc_size=64)
        for bucket in self.bucket_util.buckets:
            task = self.task.async_load_gen_docs(
                self.cluster, bucket, gen_load, "create", 0,
                batch_size=10, process_concurrency=8,
                replicate_to=self.replicate_to, persist_to=self.persist_to,
                timeout_secs=self.sdk_timeout, retries=self.sdk_retries)
            self.task.jython_task_manager.get_task_result(task)

        shell = RemoteMachineShellConnection(cluster.servers[0])
        shell.kill_memcached()

        self.sleep(10, "Wait after kill memcached")

        node1_shell_conn = RemoteMachineShellConnection(cluster.servers[0])
        node2_shell_conn = RemoteMachineShellConnection(cluster.servers[1])
        node1_cb_stat_obj = Cbstats(node1_shell_conn)
        node2_cb_stat_obj = Cbstats(node2_shell_conn)

        node1_items = node1_cb_stat_obj.all_stats(bucket, "curr_items_tot")
        node2_items = node2_cb_stat_obj.all_stats(bucket, "curr_items_tot")

        # Disconnect the opened connections
        node1_shell_conn.disconnect()
        node2_shell_conn.disconnect()

        self.assertTrue(node1_items == node2_items,
                        'Node items not equal. Node 1:{0}, node 2:{1}'
                        .format(node1_items, node2_items))
Exemple #3
0
    def test_rollback_to_zero(self):
        items = self.num_items
        mem_only_items = self.input.param("rollback_items", 10000)
        if self.nodes_init < 2 or self.num_replicas < 1:
            self.fail("Not enough nodes/replicas to test rollback")

        # Fetch vbucket stats for validation
        self.get_vb_details_cbstats_for_all_nodes("pre_rollback")

        start = self.num_items
        shell = self.node_shells[self.cluster.master]["shell"]
        cbstats = self.node_shells[self.cluster.master]["cbstat"]
        self.target_vbucket = cbstats.vbucket_list(self.bucket.name)

        # Stopping persistence on NodeA
        cbepctl = Cbepctl(shell)
        cbepctl.persistence(self.bucket.name, "stop")

        for i in xrange(1, self.num_rollbacks + 1):
            self.gen_create = doc_generator(
                self.key,
                start,
                mem_only_items,
                doc_size=self.doc_size,
                doc_type=self.doc_type,
                target_vbucket=self.target_vbucket,
                vbuckets=self.cluster_util.vbuckets,
                randomize_doc_size=self.randomize_doc_size,
                randomize_value=self.randomize_value)
            self.load_docs()
            if self.rollback_with_multiple_mutation:
                self.doc_ops = "update"
                self.load_docs()
            start = self.gen_create.key_counter
            stat_map = {self.cluster.nodes_in_cluster[0]: mem_only_items * i}
            for node in self.cluster.nodes_in_cluster[1:]:
                stat_map.update({node: 0})

            for bucket in self.bucket_util.buckets:
                self.bucket_util._wait_for_stat(bucket, stat_map)
            self.sleep(60)
            self.get_vb_details_cbstats_for_all_nodes("post_rollback")
            self.validate_seq_no_post_rollback("pre_rollback", "post_rollback")

        shell.kill_memcached()
        self.assertTrue(
            self.bucket_util._wait_warmup_completed(
                [self.cluster_util.cluster.master],
                self.bucket,
                wait_time=self.wait_timeout * 10))
        self.bucket_util.verify_stats_all_buckets(items)
        shell.disconnect()
        self.validate_test_failure()
Exemple #4
0
 def revert(self, action=None, bucket_name="default"):
     self.log.info("Reverting '{0}' in {1}".format(action,
                                                   self.shell_conn.ip))
     if action == CouchbaseError.STOP_MEMCACHED:
         _, error = self.__interrupt_process("memcached", "resume")
         self.__handle_shell_error(error)
     elif action == CouchbaseError.STOP_BEAMSMP:
         _, error = self.__interrupt_process("beam.smp", "resume")
         self.__handle_shell_error(error)
     elif action == CouchbaseError.KILL_BEAMSMP \
             or action == CouchbaseError.STOP_SERVER:
         self.shell_conn.start_server()
     elif action == CouchbaseError.STOP_PERSISTENCE:
         cbepctl_obj = Cbepctl(self.shell_conn)
         cbepctl_obj.persistence(bucket_name, "start")
     else:
         self.log.error(
             "Unsupported action to revert: '{0}'".format(action))
Exemple #5
0
    def replicate_correct_data_after_rollback(self):
        '''
        @attention:
          This test case has some issue with docker runs.
          It passes without any issue on VMs.
        '''

        bucket = self.bucket_util.buckets[0]
        cluster = self.cluster

        gen_load = doc_generator(self.key, 0, self.num_items)
        for bucket in self.bucket_util.buckets:
            task = self.task.async_load_gen_docs(
                self.cluster, bucket, gen_load, "create", 0,
                batch_size=10, process_concurrency=8,
                replicate_to=self.replicate_to, persist_to=self.persist_to,
                timeout_secs=self.sdk_timeout, retries=self.sdk_retries)
            self.task.jython_task_manager.get_task_result(task)

        # store the KVs which were modified and active on node 1
        modified_kvs_active_on_node1 = dict()
        vbucket_client = VBucketAwareMemcached(
            RestConnection(cluster.master), bucket.name)
        client = MemcachedClientHelper.direct_client(cluster.servers[0],
                                                     bucket.name)
        for i in range(self.num_items/100):
            keyname = 'keyname-' + str(i)
            vbId = self.bucket_util.get_vbucket_num_for_key(keyname,
                                                            self.vbuckets)
            if vbucket_client.vBucketMap[vbId].split(':')[0] == cluster.servers[0].ip:
                rc = client.get(keyname)
                modified_kvs_active_on_node1[keyname] = rc[2]

        # Stop persistence
        for server in cluster.servers[:self.nodes_init]:
            # Create cbepctl command object
            node_shell_conn = RemoteMachineShellConnection(server)
            cbepctl_obj = Cbepctl(node_shell_conn)

            for bucket in self.bucket_util.buckets:
                cbepctl_obj.persistence(bucket.name, "stop")

            # Disconnect the shell_connection
            node_shell_conn.disconnect()

        # modify less than 1/2 of the keys
        gen_load = doc_generator(self.key, 0, self.num_items/100)
        rc = self.cluster.load_gen_docs(
            cluster.servers[0], bucket.name, gen_load,
            bucket.kvs[1], "create", exp=0, flag=0, batch_size=10,
            compression=self.sdk_compression)

        # kill memcached, when it comes back because persistence is disabled
        # it will have lost the second set of mutations
        shell = RemoteMachineShellConnection(cluster.servers[0])
        shell.kill_memcached()
        self.sleep(10, "Sleep after kill memcached")

        # Start persistence on the second node
        # Create cbepctl command object
        node_shell_conn = RemoteMachineShellConnection(cluster.servers[1])
        cbepctl_obj = Cbepctl(node_shell_conn)

        for bucket in self.bucket_util.buckets:
            cbepctl_obj.persistence(bucket.name, "start")

        # Disconnect the shell_connection
        node_shell_conn.disconnect()

        self.sleep(10, "Sleep after start persistence")

        # failover to the second node
        rc = self.cluster.failover(cluster.servers, cluster.servers[1:2],
                                   graceful=True)
        self.sleep(30, "Sleep after node failover triggered")

        # Values should be what they were prior to the second update
        client = MemcachedClientHelper.direct_client(
            cluster.servers[0], bucket.name)
        for k, v in modified_kvs_active_on_node1.iteritems():
            rc = client.get(k)
            self.assertTrue(v == rc[2], 'Expected {0}, actual {1}'
                                        .format(v, rc[2]))

        # need to rebalance the node back into the cluster
        # def rebalance(self, servers, to_add, to_remove, timeout=None,
        #               use_hostnames=False, services = None):

        rest_obj = RestConnection(cluster.servers[0])
        nodes_all = rest_obj.node_statuses()
        for node in nodes_all:
            if node.ip == cluster.servers[1].ip:
                break

        node_id_for_recovery = node.id
        status = rest_obj.add_back_node(node_id_for_recovery)
        if status:
            rest_obj.set_recovery_type(node_id_for_recovery,
                                       recoveryType='delta')
        rc = self.cluster.rebalance(cluster.servers[:self.nodes_init], [], [])
Exemple #6
0
    def test_rollback_n_times(self):
        items = self.num_items
        mem_only_items = self.input.param("rollback_items", 100)
        if self.nodes_init < 2 or self.num_replicas < 1:
            self.fail("Not enough nodes/replicas to test rollback")

        # Fetch vbucket stats for validation
        self.get_vb_details_cbstats_for_all_nodes("pre_rollback")

        shell = self.node_shells[self.cluster.master]["shell"]
        cbstats = self.node_shells[self.cluster.master]["cbstat"]
        self.target_vbucket = cbstats.vbucket_list(self.bucket.name)
        start = self.num_items
        self.gen_validate = self.gen_create

        for _ in xrange(1, self.num_rollbacks + 1):
            # Stopping persistence on NodeA
            cbepctl = Cbepctl(shell)
            cbepctl.persistence(self.bucket.name, "stop")
            self.gen_create = doc_generator(
                self.key,
                start,
                mem_only_items,
                doc_size=self.doc_size,
                doc_type=self.doc_type,
                target_vbucket=self.target_vbucket,
                vbuckets=self.cluster_util.vbuckets,
                randomize_doc_size=self.randomize_doc_size,
                randomize_value=self.randomize_value)

            self.load_docs()
            if self.rollback_with_multiple_mutation:
                self.doc_ops = "update"
                self.load_docs()
            start = self.gen_create.key_counter
            ep_queue_size_map = {
                self.cluster.nodes_in_cluster[0]: mem_only_items
            }
            vb_replica_queue_size_map = {self.cluster.nodes_in_cluster[0]: 0}
            for node in self.cluster.nodes_in_cluster[1:]:
                ep_queue_size_map.update({node: 0})
                vb_replica_queue_size_map.update({node: 0})

            for bucket in self.bucket_util.buckets:
                self.bucket_util._wait_for_stat(bucket, ep_queue_size_map)
                self.bucket_util._wait_for_stat(
                    bucket,
                    vb_replica_queue_size_map,
                    stat_name="vb_replica_queue_size")

            # Kill memcached on NodeA to trigger rollback on other Nodes
            # replica vBuckets
            for bucket in self.bucket_util.buckets:
                self.log.debug(cbstats.failover_stats(bucket.name))
            shell.kill_memcached()
            self.assertTrue(
                self.bucket_util._wait_warmup_completed(
                    [self.cluster_util.cluster.master],
                    self.bucket,
                    wait_time=self.wait_timeout * 10))
            self.sleep(10, "Wait after warmup complete. Not required !!")
            self.bucket_util.verify_stats_all_buckets(items, timeout=300)
            for bucket in self.bucket_util.buckets:
                self.log.debug(cbstats.failover_stats(bucket.name))

            data_validation = self.task.async_validate_docs(self.cluster,
                                                            self.bucket,
                                                            self.gen_validate,
                                                            "create",
                                                            0,
                                                            batch_size=10)
            self.task.jython_task_manager.get_task_result(data_validation)
            self.get_vb_details_cbstats_for_all_nodes("post_rollback")
            self.validate_seq_no_post_rollback("pre_rollback", "post_rollback")

        self.validate_test_failure()
Exemple #7
0
    def test_ttl_less_than_durability_timeout(self):
        """
        MB-43238
        1. Regular write with TTL 1 second for some key
        2. Disable expiry pager (to prevent raciness)
        3. Wait TTL period
        4. Disable persistence on the node with the replica vBucket for that key
        5. SyncWrite PersistMajority to active vBucket for that key (should hang)
        6. Access key on other thread to trigger expiry
        7. Observe DCP connection being torn down without fix
        """
        def perform_sync_write():
            client.crud(DocLoading.Bucket.DocOps.CREATE,
                        key, {},
                        durability=Bucket.DurabilityLevel.PERSIST_TO_MAJORITY,
                        timeout=60)

        doc_ttl = 5
        target_node = None
        key = "test_ttl_doc"
        vb_for_key = self.bucket_util.get_vbucket_num_for_key(key)
        bucket = self.cluster.buckets[0]

        # Find target node for replica VB
        for target_node in self.cluster.nodes_in_cluster:
            cb_stats = Cbstats(target_node)
            if vb_for_key in cb_stats.vbucket_list(bucket.name, "replica"):
                break

        self.log.info("Target node: %s, Key: %s" % (target_node.ip, key))
        self.log.info("Disabling expiry_pager")
        shell = RemoteMachineShellConnection(target_node)
        cb_ep_ctl = Cbepctl(shell)
        cb_ep_ctl.set(bucket.name, "flush_param", "exp_pager_stime", 0)

        # Create SDK client
        client = SDKClient([self.cluster.master], bucket)

        self.log.info("Non-sync write with TTL=%s" % doc_ttl)
        client.crud(DocLoading.Bucket.DocOps.CREATE, key, {}, exp=doc_ttl)

        self.sleep(doc_ttl, "Wait for document to expire")
        self.bucket_util._wait_for_stats_all_buckets(self.cluster,
                                                     self.cluster.buckets)

        self.log.info("Stopping persistence on replica VB node using cbepctl")
        cb_ep_ctl.persistence(bucket.name, "stop")

        # Start doc_load with lesser ttl
        doc_create_thread = Thread(target=perform_sync_write)
        doc_create_thread.start()
        self.sleep(2, "Wait for sync_write thread to start")

        self.log.info("Read key from another thread to trigger expiry")
        failure = None
        result = client.crud(DocLoading.Bucket.DocOps.READ, key)
        if SDKException.DocumentNotFoundException not in str(result["error"]):
            failure = "Invalid exception: %s" % result["error"]

        self.log.info("Resuming persistence on target node")
        cb_ep_ctl.persistence(bucket.name, "start")

        # Wait for doc_create_thread to complete
        doc_create_thread.join()

        # Close SDK client and shell connections
        client.close()
        shell.disconnect()

        if failure:
            self.fail(failure)

        for node in self.cluster.nodes_in_cluster:
            cb_stats = Cbstats(node).all_stats(bucket.name)
            self.log.info("Node: %s, ep_expired_access: %s" %
                          (node.ip, cb_stats["ep_expired_access"]))
            self.assertEqual(int(cb_stats["ep_expired_access"]), 0,
                             "%s: ep_expired_access != 0" % node.ip)