Example #1
0
 def create(self, action=None, bucket_name="default"):
     self.log.info("Simulating '{0}' in {1}".format(action,
                                                    self.shell_conn.ip))
     if action == CouchbaseError.STOP_MEMCACHED:
         _, error = self.__interrupt_process("memcached", "stop")
         self.__handle_shell_error(error)
     elif action == CouchbaseError.KILL_MEMCACHED:
         _, error = self.__interrupt_process("memcached", "kill")
         self.__handle_shell_error(error)
     elif action == CouchbaseError.STOP_BEAMSMP:
         _, error = self.__interrupt_process("beam.smp", "stop")
         self.__handle_shell_error(error)
     elif action == CouchbaseError.STOP_PROMETHEUS:
         _, error = self.__interrupt_process("prometheus", "stop")
         self.__handle_shell_error(error)
     elif action == CouchbaseError.KILL_BEAMSMP:
         _, error = self.__interrupt_process("beam.smp", "kill")
         self.__handle_shell_error(error)
     elif action == CouchbaseError.KILL_PROMETHEUS:
         _, error = self.__interrupt_process("prometheus", "kill")
         self.__handle_shell_error(error)
     elif action == CouchbaseError.STOP_SERVER:
         self.shell_conn.stop_server()
     elif action == CouchbaseError.STOP_PERSISTENCE:
         cbepctl_obj = Cbepctl(self.shell_conn)
         cbepctl_obj.persistence(bucket_name, "stop")
     else:
         self.log.error("Unsupported action: '{0}'".format(action))
Example #2
0
    def test_rollback_and_persistence_race_condition(self):
        cluster = self.cluster
        gen_load = doc_generator(self.key, 0, self.num_items)
        for bucket in self.bucket_util.buckets:
            task = self.task.async_load_gen_docs(
                self.cluster, bucket, gen_load, "create", 0,
                batch_size=10, process_concurrency=8,
                replicate_to=self.replicate_to, persist_to=self.persist_to,
                timeout_secs=self.sdk_timeout, retries=self.sdk_retries)
            self.task.jython_task_manager.get_task_result(task)

        # Stop persistence
        for server in cluster.servers[:self.nodes_init]:
            # Create cbepctl command object
            node_shell_conn = RemoteMachineShellConnection(server)
            cbepctl_obj = Cbepctl(node_shell_conn)

            for bucket in self.bucket_util.buckets:
                cbepctl_obj.persistence(bucket.name, "stop")

            # Disconnect the shell_connection
            node_shell_conn.disconnect()

        self.sleep(10, "Wait after stop_persistence")

        # more (non-intersecting) load
        gen_load = doc_generator(self.key, 0, self.num_items, doc_size=64)
        for bucket in self.bucket_util.buckets:
            task = self.task.async_load_gen_docs(
                self.cluster, bucket, gen_load, "create", 0,
                batch_size=10, process_concurrency=8,
                replicate_to=self.replicate_to, persist_to=self.persist_to,
                timeout_secs=self.sdk_timeout, retries=self.sdk_retries)
            self.task.jython_task_manager.get_task_result(task)

        shell = RemoteMachineShellConnection(cluster.servers[0])
        shell.kill_memcached()

        self.sleep(10, "Wait after kill memcached")

        node1_shell_conn = RemoteMachineShellConnection(cluster.servers[0])
        node2_shell_conn = RemoteMachineShellConnection(cluster.servers[1])
        node1_cb_stat_obj = Cbstats(node1_shell_conn)
        node2_cb_stat_obj = Cbstats(node2_shell_conn)

        node1_items = node1_cb_stat_obj.all_stats(bucket, "curr_items_tot")
        node2_items = node2_cb_stat_obj.all_stats(bucket, "curr_items_tot")

        # Disconnect the opened connections
        node1_shell_conn.disconnect()
        node2_shell_conn.disconnect()

        self.assertTrue(node1_items == node2_items,
                        'Node items not equal. Node 1:{0}, node 2:{1}'
                        .format(node1_items, node2_items))
Example #3
0
    def test_rollback_to_zero(self):
        items = self.num_items
        mem_only_items = self.input.param("rollback_items", 10000)
        if self.nodes_init < 2 or self.num_replicas < 1:
            self.fail("Not enough nodes/replicas to test rollback")

        # Fetch vbucket stats for validation
        self.get_vb_details_cbstats_for_all_nodes("pre_rollback")

        start = self.num_items
        shell = self.node_shells[self.cluster.master]["shell"]
        cbstats = self.node_shells[self.cluster.master]["cbstat"]
        self.target_vbucket = cbstats.vbucket_list(self.bucket.name)

        # Stopping persistence on NodeA
        cbepctl = Cbepctl(shell)
        cbepctl.persistence(self.bucket.name, "stop")

        for i in xrange(1, self.num_rollbacks + 1):
            self.gen_create = doc_generator(
                self.key,
                start,
                mem_only_items,
                doc_size=self.doc_size,
                doc_type=self.doc_type,
                target_vbucket=self.target_vbucket,
                vbuckets=self.cluster_util.vbuckets,
                randomize_doc_size=self.randomize_doc_size,
                randomize_value=self.randomize_value)
            self.load_docs()
            if self.rollback_with_multiple_mutation:
                self.doc_ops = "update"
                self.load_docs()
            start = self.gen_create.key_counter
            stat_map = {self.cluster.nodes_in_cluster[0]: mem_only_items * i}
            for node in self.cluster.nodes_in_cluster[1:]:
                stat_map.update({node: 0})

            for bucket in self.bucket_util.buckets:
                self.bucket_util._wait_for_stat(bucket, stat_map)
            self.sleep(60)
            self.get_vb_details_cbstats_for_all_nodes("post_rollback")
            self.validate_seq_no_post_rollback("pre_rollback", "post_rollback")

        shell.kill_memcached()
        self.assertTrue(
            self.bucket_util._wait_warmup_completed(
                [self.cluster_util.cluster.master],
                self.bucket,
                wait_time=self.wait_timeout * 10))
        self.bucket_util.verify_stats_all_buckets(items)
        shell.disconnect()
        self.validate_test_failure()
Example #4
0
 def revert(self, action=None, bucket_name="default"):
     self.log.info("Reverting '{0}' in {1}".format(action,
                                                   self.shell_conn.ip))
     if action == CouchbaseError.STOP_MEMCACHED:
         _, error = self.__interrupt_process("memcached", "resume")
         self.__handle_shell_error(error)
     elif action == CouchbaseError.STOP_BEAMSMP:
         _, error = self.__interrupt_process("beam.smp", "resume")
         self.__handle_shell_error(error)
     elif action == CouchbaseError.KILL_BEAMSMP \
             or action == CouchbaseError.STOP_SERVER:
         self.shell_conn.start_server()
     elif action == CouchbaseError.STOP_PERSISTENCE:
         cbepctl_obj = Cbepctl(self.shell_conn)
         cbepctl_obj.persistence(bucket_name, "start")
     else:
         self.log.error(
             "Unsupported action to revert: '{0}'".format(action))
Example #5
0
 def test_flush_bucket_during_data_persistence(self):
     self.assertTrue(self.rest.update_autofailover_settings(False, 600),
                     "AutoFailover disabling failed")
     count = 0
     start = copy.deepcopy(self.init_items_per_collection)
     while count < self.test_itr:
         self.log.info("Iteration {}".format(count + 1))
         self.compute_docs(start, start)
         for shell in self.shell_conn:
             for bucket in self.cluster.buckets:
                 Cbepctl(shell).persistence(bucket.name, "stop")
         self.generate_docs()
         tasks_info = dict()
         for scope in self.scopes:
             for collection in self.collections:
                 task_info = self.loadgen_docs(
                     self.retry_exceptions,
                     self.ignore_exceptions,
                     scope=scope,
                     collection=collection,
                     suppress_error_table=True,
                     skip_read_on_error=True,
                     _sync=False,
                     doc_ops=self.doc_ops,
                     track_failures=False,
                     sdk_retry_strategy=SDKConstants.RetryStrategy.FAIL_FAST
                 )
                 tasks_info.update(task_info.items())
         for task in tasks_info:
             self.task_manager.get_task_result(task)
         for shell in self.shell_conn:
             for bucket in self.cluster.buckets:
                 Cbepctl(shell).persistence(bucket.name, "start")
         self.sleep(10, "sleep before flush thread")
         for bucket in self.buckets:
             self.bucket_util.flush_bucket(self.cluster, bucket)
         count += 1
Example #6
0
    def test_rollback_to_zero(self):
        if self.nodes_init < 2 or self.num_replicas < 1:
            self.fail("Not enough nodes/replicas to test rollback")

        keys_to_verify = ["high_completed_seqno",
                          "purge_seqno"]
        doc_loading_task_2 = None
        # Override num_items to load data into each collection
        self.num_items = 10000

        # Set values to num_items to support loading through
        # collection loading task
        for bucket in self.bucket_util.buckets:
            for _, scope in bucket.scopes.items():
                for _, collection in scope.collections.items():
                    collection.num_items = self.num_items

        # Fetch vbucket stats for validation
        self.get_vb_details_cbstats_for_all_nodes("pre_rollback")

        target_node = choice(self.cluster_util.get_kv_nodes())
        shell = self.node_shells[target_node]["shell"]
        cbstats = self.node_shells[target_node]["cbstat"]
        self.target_vbuckets = cbstats.vbucket_list(self.bucket.name)

        for i in xrange(1, self.num_rollbacks + 1):
            self.total_rollback_items = 0
            self.log.info("Stopping persistence on %s" % target_node.ip)
            Cbepctl(shell).persistence(self.bucket.name, "stop")

            doc_loading_task_1 = self.load_docs(self.doc_ops)
            if self.rollback_with_multiple_mutation:
                doc_loading_task_2 = self.load_docs("update")
            stat_map = dict()
            for node in self.cluster.nodes_in_cluster:
                expected_val = 0
                if node.ip == target_node.ip:
                    expected_val = self.total_rollback_items
                    if self.sync_write_enabled:
                        # Includes prepare+commit mutation
                        expected_val *= 2
                stat_map.update({node: expected_val})

            for bucket in self.bucket_util.buckets:
                self.bucket_util._wait_for_stat(bucket, stat_map,
                                                timeout=self.wait_timeout)

            if doc_loading_task_2:
                self.__rewind_doc_index(doc_loading_task_2)
            self.__rewind_doc_index(doc_loading_task_1)

            self.log.info("Killing memcached to trigger rollback")
            shell.kill_memcached()
            self.assertTrue(self.bucket_util._wait_warmup_completed(
                [target_node],
                self.bucket,
                wait_time=300))

            self.sleep(10, "Wait after bucket warmup for cbstats to work")
            self.get_vb_details_cbstats_for_all_nodes("post_rollback")
            self.validate_seq_no_post_rollback("pre_rollback", "post_rollback",
                                               keys_to_verify)

        # Reset expected values to '0' for validation
        for bucket in self.bucket_util.buckets:
            for _, scope in bucket.scopes.items():
                for _, collection in scope.collections.items():
                    collection.num_items = 0
        self.bucket_util.validate_docs_per_collections_all_buckets()
        self.validate_test_failure()
Example #7
0
    def test_rollback_during_compaction(self):
        '''
        '''
        self.assertTrue(self.rest.update_autofailover_settings(False, 600),
                        "AutoFailover disabling failed")
        items = copy.deepcopy(self.num_items)
        mem_only_items = self.input.param("rollback_items", 10000)

        ops_len = len(self.doc_ops.split(":"))

        if self.nodes_init < 2 or self.num_replicas < 1:
            self.fail("Not enough nodes/replicas in the cluster/bucket \
            to test rollback")

        self.num_rollbacks = self.input.param("num_rollbacks", 1)

        #######################################################################
        '''
        STEP - 1,  Stop persistence on node - x
        '''

        for i in range(1, self.num_rollbacks+1):
            self.log.info("Roll back Iteration == {}".format(i))
            start = items
            shell = RemoteMachineShellConnection(self.cluster.master)
            cbstats = Cbstats(self.cluster.master)
            self.target_vbucket = cbstats.vbucket_list(self.cluster.buckets[0].
                                                   name)
            mem_item_count = 0
            # Stopping persistence on Node-x
            self.log.debug("Stopping persistence on Node-{}"
                               .format(self.cluster.master))
            Cbepctl(shell).persistence(self.cluster.buckets[0].name, "stop")

            ###############################################################
            '''
            STEP - 3
              -- Load documents on node  x for  self.duration * 60 seconds
              -- This step ensures new state files (number equal to self.duration)
            '''
            self.compute_docs(start, mem_only_items)
            self.gen_create = None
            self.gen_update = None
            self.gen_delete = None
            self.gen_expiry = None
            mem_item_count += mem_only_items * ops_len
            self.generate_docs(doc_ops=self.doc_ops,
                               target_vbucket=self.target_vbucket)
            self.loadgen_docs(_sync=True,
                              retry_exceptions=self.retry_exceptions,
                              ignore_exceptions=self.ignore_exceptions)
            if self.gen_create is not None:
                self.create_start = self.gen_create.key_counter
            if self.gen_update is not None:
                self.update_start = self.gen_update.key_counter
            if self.gen_delete is not None:
                self.delete_start = self.gen_delete.key_counter
            if self.gen_expiry is not None:
                self.expiry_start = self.gen_expiry.key_counter

            ep_queue_size_map = {self.cluster.nodes_in_cluster[0]:
                                 mem_item_count}
            if self.durability_level:
                self.log.info("updating the num_items on disk check to double due to durability")
                ep_queue_size_map = {self.cluster.nodes_in_cluster[0]:
                                     mem_item_count * 2}
                vb_replica_queue_size_map = {self.cluster.nodes_in_cluster[0]:
                                             0}

            for node in self.cluster.nodes_in_cluster[1:]:
                ep_queue_size_map.update({node: 0})
                vb_replica_queue_size_map.update({node: 0})

                #for bucket in self.cluster.buckets:
                #    self.bucket_util._wait_for_stat(bucket, ep_queue_size_map,
                #                                    timeout=1200)
                #    self.bucket_util._wait_for_stat(bucket, vb_replica_queue_size_map,
                #                                    cbstat_cmd="all",
                #                                    stat_name="vb_replica_queue_size",
                #                                    timeout=1200)
                # replica vBuckets
                #for bucket in self.cluster.buckets:
                #    self.log.debug(cbstats.failover_stats(bucket.name))

            ###############################################################
            '''
            STEP - 4
                -- Kill Memcached on master node and trigger rollback on other nodes
            '''
            if self.compact_before:
                compaction_tasks=[]
                for bucket in self.cluster.buckets:
                    compaction_tasks.append(self.task.async_compact_bucket(self.cluster.master,
                                               bucket))
            shell.kill_memcached()

            if self.compact_after:
                self.bucket_util._run_compaction(self.cluster,
                                                 number_of_times=1)
            if self.compact_before:
                for task in compaction_tasks:
                    self.task_manager.get_task_result(task)

            self.assertTrue(self.bucket_util._wait_warmup_completed(
                [self.cluster.master],
                self.cluster.buckets[0],
                wait_time=self.wait_timeout * 10))

            ###############################################################
            '''
            STEP -5
                -- Restarting persistence on Node -- x
            '''

            self.log.debug("RollBack Iteration=={}, Re-Starting persistence on Node -- {}".format(i, self.cluster.master))
            Cbepctl(shell).persistence(self.cluster.buckets[0].name, "start")

            self.sleep(5, "Sleep after re-starting persistence, Iteration{}".format(i))
            for nod in self.cluster.nodes_in_cluster:
                ep_queue_size_map.update({nod: 0})
                vb_replica_queue_size_map.update({nod: 0})
            for bucket in self.cluster.buckets:
                self.bucket_util._wait_for_stat(bucket,
                                                    ep_queue_size_map, timeout=600)
                self.bucket_util._wait_for_stat(bucket,
                                                vb_replica_queue_size_map,
                                                cbstat_cmd="all",
                                                stat_name="vb_replica_queue_size", timeout=600)
            shell.disconnect()
            ###################################################################
            '''
            STEP - 6
              -- Load Docs on all the nodes
              -- Loading of doc for 60 seconds
              -- Ensures creation of new state file
            '''
            self.create_start = items
            self.create_end = items + 100000
            self.generate_docs(doc_ops="create", target_vbucket=None)
            self.loadgen_docs(self.retry_exceptions,
                                  self.ignore_exceptions, _sync=True,
                                  doc_ops="create")
            self.bucket_util._wait_for_stats_all_buckets(
                self.cluster, self.cluster.buckets, timeout=1200)
            items = items + 100000
            self.log.debug("Iteration == {}, Total num_items {}".format(i, items))
Example #8
0
    def replicate_correct_data_after_rollback(self):
        '''
        @attention:
          This test case has some issue with docker runs.
          It passes without any issue on VMs.
        '''

        bucket = self.bucket_util.buckets[0]
        cluster = self.cluster

        gen_load = doc_generator(self.key, 0, self.num_items)
        for bucket in self.bucket_util.buckets:
            task = self.task.async_load_gen_docs(
                self.cluster, bucket, gen_load, "create", 0,
                batch_size=10, process_concurrency=8,
                replicate_to=self.replicate_to, persist_to=self.persist_to,
                timeout_secs=self.sdk_timeout, retries=self.sdk_retries)
            self.task.jython_task_manager.get_task_result(task)

        # store the KVs which were modified and active on node 1
        modified_kvs_active_on_node1 = dict()
        vbucket_client = VBucketAwareMemcached(
            RestConnection(cluster.master), bucket.name)
        client = MemcachedClientHelper.direct_client(cluster.servers[0],
                                                     bucket.name)
        for i in range(self.num_items/100):
            keyname = 'keyname-' + str(i)
            vbId = self.bucket_util.get_vbucket_num_for_key(keyname,
                                                            self.vbuckets)
            if vbucket_client.vBucketMap[vbId].split(':')[0] == cluster.servers[0].ip:
                rc = client.get(keyname)
                modified_kvs_active_on_node1[keyname] = rc[2]

        # Stop persistence
        for server in cluster.servers[:self.nodes_init]:
            # Create cbepctl command object
            node_shell_conn = RemoteMachineShellConnection(server)
            cbepctl_obj = Cbepctl(node_shell_conn)

            for bucket in self.bucket_util.buckets:
                cbepctl_obj.persistence(bucket.name, "stop")

            # Disconnect the shell_connection
            node_shell_conn.disconnect()

        # modify less than 1/2 of the keys
        gen_load = doc_generator(self.key, 0, self.num_items/100)
        rc = self.cluster.load_gen_docs(
            cluster.servers[0], bucket.name, gen_load,
            bucket.kvs[1], "create", exp=0, flag=0, batch_size=10,
            compression=self.sdk_compression)

        # kill memcached, when it comes back because persistence is disabled
        # it will have lost the second set of mutations
        shell = RemoteMachineShellConnection(cluster.servers[0])
        shell.kill_memcached()
        self.sleep(10, "Sleep after kill memcached")

        # Start persistence on the second node
        # Create cbepctl command object
        node_shell_conn = RemoteMachineShellConnection(cluster.servers[1])
        cbepctl_obj = Cbepctl(node_shell_conn)

        for bucket in self.bucket_util.buckets:
            cbepctl_obj.persistence(bucket.name, "start")

        # Disconnect the shell_connection
        node_shell_conn.disconnect()

        self.sleep(10, "Sleep after start persistence")

        # failover to the second node
        rc = self.cluster.failover(cluster.servers, cluster.servers[1:2],
                                   graceful=True)
        self.sleep(30, "Sleep after node failover triggered")

        # Values should be what they were prior to the second update
        client = MemcachedClientHelper.direct_client(
            cluster.servers[0], bucket.name)
        for k, v in modified_kvs_active_on_node1.iteritems():
            rc = client.get(k)
            self.assertTrue(v == rc[2], 'Expected {0}, actual {1}'
                                        .format(v, rc[2]))

        # need to rebalance the node back into the cluster
        # def rebalance(self, servers, to_add, to_remove, timeout=None,
        #               use_hostnames=False, services = None):

        rest_obj = RestConnection(cluster.servers[0])
        nodes_all = rest_obj.node_statuses()
        for node in nodes_all:
            if node.ip == cluster.servers[1].ip:
                break

        node_id_for_recovery = node.id
        status = rest_obj.add_back_node(node_id_for_recovery)
        if status:
            rest_obj.set_recovery_type(node_id_for_recovery,
                                       recoveryType='delta')
        rc = self.cluster.rebalance(cluster.servers[:self.nodes_init], [], [])
Example #9
0
    def test_flush_bucket_during_rollback(self):
        '''
        Test focus: Stopping persistence one by one on all nodes,
                    and trigger roll back on other  nodes,
                    During rollback flush the data
                    Above step will be done num_rollback
                    (variable defined in test) times

        STEPS:
         -- Ensure creation of at least a single state file
         -- Below steps will be repeated on all nodes, with stopping peristence on one at a time
         -- Stop persistence on node x
         -- Start load on node x for a given duration(self.duration * 60 seconds)
         -- Above step ensures creation of new state files (# equal to self.duration)
         -- Kill MemCached on Node x
         -- Trigger roll back on other/replica nodes
         -- ReStart persistence on Node -x
         -- Repeat all the above steps for num_rollback times
        '''
        self.assertTrue(self.rest.update_autofailover_settings(False, 600),
                        "AutoFailover disabling failed")
        items = copy.deepcopy(self.init_items_per_collection)
        mem_only_items = self.input.param("rollback_items", 10000)

        ops_len = len(self.doc_ops.split(":"))

        if self.nodes_init < 2 or self.num_replicas < 1:
            self.fail("Not enough nodes/replicas in the cluster/bucket \
            to test rollback")

        self.duration = self.input.param("duration", 2)
        self.num_rollbacks = self.input.param("num_rollbacks", 3)

        #######################################################################
        '''
        STEP - 1, Ensures creation of at least one snapshot

        To ensure at least one snapshot should get created before rollback
        starts, we need to sleep for 60 seconds as per magma design which
        create state file every 60s

        '''
        self.sleep(60, "Ensures creation of at least one snapshot")
        #######################################################################
        '''
        STEP - 2,  Stop persistence on node - x
        '''

        for i in range(1, self.num_rollbacks+1):
            self.log.info("Roll back Iteration == {}".format(i))
            start = items
            for x, node in enumerate(self.cluster.nodes_in_cluster):
                shell = RemoteMachineShellConnection(node)
                cbstats = Cbstats(shell)
                self.target_vbucket = cbstats.vbucket_list(self.cluster.buckets[0].
                                                   name)
                mem_item_count = 0
                # Stopping persistence on Node-x
                self.log.debug("Iteration == {}, Stopping persistence on Node-{}, ip ={}"
                               .format(i, x+1, node))
                Cbepctl(shell).persistence(self.cluster.buckets[0].name, "stop")

                ###############################################################
                '''
                STEP - 3
                  -- Load documents on node  x for  self.duration * 60 seconds
                  -- This step ensures new state files (number equal to self.duration)
                '''
                self.compute_docs(start, mem_only_items)
                self.gen_create = None
                self.gen_update = None
                self.gen_delete = None
                self.gen_expiry = None
                time_end = time.time() + 60 * self.duration
                itr = 0
                while time.time() < time_end:
                    itr += 1
                    time_start = time.time()
                    mem_item_count += mem_only_items * ops_len
                    self.generate_docs(doc_ops=self.doc_ops,
                                       target_vbucket=self.target_vbucket)
                    self.loadgen_docs(_sync=True,
                                      retry_exceptions=self.retry_exceptions)
                    if self.gen_create is not None:
                        self.create_start = self.gen_create.key_counter
                    if self.gen_update is not None:
                        self.update_start = self.gen_update.key_counter
                    if self.gen_delete is not None:
                        self.delete_start = self.gen_delete.key_counter
                    if self.gen_expiry is not None:
                        self.expiry_start = self.gen_expiry.key_counter

                    if time.time() < time_start + 60:
                        self.log.info("Rollback Iteration== {}, itr== {}, Active-Node=={}, Node=={}".format(i, itr, x+1, node))
                        self.sleep(time_start + 60 - time.time(),
                                   "Sleep to ensure creation of state files for roll back")
                        self.log.info("state files == {}".format(
                                     self.get_state_files(self.buckets[0])))
                ep_queue_size_map = {node:
                                     mem_item_count}
                if self.durability_level:
                    self.log.info("updating the num_items on disk check to double due to durability")
                    ep_queue_size_map = {node:
                                     mem_item_count * 2}
                vb_replica_queue_size_map = {node: 0}

                for nod in self.cluster.nodes_in_cluster:
                    if nod != node:
                        ep_queue_size_map.update({nod: 0})
                        vb_replica_queue_size_map.update({nod: 0})

                for bucket in self.cluster.buckets:
                    self.bucket_util._wait_for_stat(bucket, ep_queue_size_map,
                                                    timeout=1200)
                    self.bucket_util._wait_for_stat(bucket, vb_replica_queue_size_map,
                                                    cbstat_cmd="all",
                                                    stat_name="vb_replica_queue_size",
                                                    timeout=1200)
                # replica vBuckets
                for bucket in self.cluster.buckets:
                    self.log.debug(cbstats.failover_stats(bucket.name))

                ###############################################################
                '''
                STEP - 4
                  -- Kill Memcached on Node - x and trigger rollback on other nodes
                  -- After 20 seconds , flush bucket
                '''

                shell.kill_memcached()
                self.sleep(20, "sleep after killing memcached")
                self.bucket_util.flush_bucket(self.cluster, self.cluster.buckets[0])
                ###############################################################
                '''
                STEP -5
                 -- Restarting persistence on Node -- x
                '''
                self.assertTrue(self.bucket_util._wait_warmup_completed(
                    [self.cluster.master],
                    self.cluster.buckets[0],
                    wait_time=self.wait_timeout * 10))

                self.log.debug("Iteration=={}, Re-Starting persistence on Node -- {}".format(i, node))
                Cbepctl(shell).persistence(self.cluster.buckets[0].name, "start")

                self.sleep(5, "Sleep after re-starting persistence, Iteration{}".format(i))
                shell.disconnect()
                ###################################################################
                '''
                STEP - 6
                  -- Load Docs on all the nodes
                  -- Loading of doc for 60 seconds
                  -- Ensures creation of new state file
                '''
                self.create_start = 0
                self.create_end = self.init_items_per_collection
                self.generate_docs(doc_ops="create", target_vbucket=None)
                self.loadgen_docs(self.retry_exceptions,
                              self.ignore_exceptions, _sync=True,
                              doc_ops="create")
                self.bucket_util._wait_for_stats_all_buckets(self.cluster,
                                                             self.cluster.buckets,
                                                             timeout=1200)
Example #10
0
    def test_rollback_n_times(self):
        items = self.num_items
        mem_only_items = self.input.param("rollback_items", 100)
        if self.nodes_init < 2 or self.num_replicas < 1:
            self.fail("Not enough nodes/replicas to test rollback")

        # Fetch vbucket stats for validation
        self.get_vb_details_cbstats_for_all_nodes("pre_rollback")

        shell = self.node_shells[self.cluster.master]["shell"]
        cbstats = self.node_shells[self.cluster.master]["cbstat"]
        self.target_vbucket = cbstats.vbucket_list(self.bucket.name)
        start = self.num_items
        self.gen_validate = self.gen_create

        for _ in xrange(1, self.num_rollbacks + 1):
            # Stopping persistence on NodeA
            cbepctl = Cbepctl(shell)
            cbepctl.persistence(self.bucket.name, "stop")
            self.gen_create = doc_generator(
                self.key,
                start,
                mem_only_items,
                doc_size=self.doc_size,
                doc_type=self.doc_type,
                target_vbucket=self.target_vbucket,
                vbuckets=self.cluster_util.vbuckets,
                randomize_doc_size=self.randomize_doc_size,
                randomize_value=self.randomize_value)

            self.load_docs()
            if self.rollback_with_multiple_mutation:
                self.doc_ops = "update"
                self.load_docs()
            start = self.gen_create.key_counter
            ep_queue_size_map = {
                self.cluster.nodes_in_cluster[0]: mem_only_items
            }
            vb_replica_queue_size_map = {self.cluster.nodes_in_cluster[0]: 0}
            for node in self.cluster.nodes_in_cluster[1:]:
                ep_queue_size_map.update({node: 0})
                vb_replica_queue_size_map.update({node: 0})

            for bucket in self.bucket_util.buckets:
                self.bucket_util._wait_for_stat(bucket, ep_queue_size_map)
                self.bucket_util._wait_for_stat(
                    bucket,
                    vb_replica_queue_size_map,
                    stat_name="vb_replica_queue_size")

            # Kill memcached on NodeA to trigger rollback on other Nodes
            # replica vBuckets
            for bucket in self.bucket_util.buckets:
                self.log.debug(cbstats.failover_stats(bucket.name))
            shell.kill_memcached()
            self.assertTrue(
                self.bucket_util._wait_warmup_completed(
                    [self.cluster_util.cluster.master],
                    self.bucket,
                    wait_time=self.wait_timeout * 10))
            self.sleep(10, "Wait after warmup complete. Not required !!")
            self.bucket_util.verify_stats_all_buckets(items, timeout=300)
            for bucket in self.bucket_util.buckets:
                self.log.debug(cbstats.failover_stats(bucket.name))

            data_validation = self.task.async_validate_docs(self.cluster,
                                                            self.bucket,
                                                            self.gen_validate,
                                                            "create",
                                                            0,
                                                            batch_size=10)
            self.task.jython_task_manager.get_task_result(data_validation)
            self.get_vb_details_cbstats_for_all_nodes("post_rollback")
            self.validate_seq_no_post_rollback("pre_rollback", "post_rollback")

        self.validate_test_failure()
Example #11
0
    def test_ttl_less_than_durability_timeout(self):
        """
        MB-43238
        1. Regular write with TTL 1 second for some key
        2. Disable expiry pager (to prevent raciness)
        3. Wait TTL period
        4. Disable persistence on the node with the replica vBucket for that key
        5. SyncWrite PersistMajority to active vBucket for that key (should hang)
        6. Access key on other thread to trigger expiry
        7. Observe DCP connection being torn down without fix
        """
        def perform_sync_write():
            client.crud(DocLoading.Bucket.DocOps.CREATE,
                        key, {},
                        durability=Bucket.DurabilityLevel.PERSIST_TO_MAJORITY,
                        timeout=60)

        doc_ttl = 5
        target_node = None
        key = "test_ttl_doc"
        vb_for_key = self.bucket_util.get_vbucket_num_for_key(key)
        bucket = self.cluster.buckets[0]

        # Find target node for replica VB
        for target_node in self.cluster.nodes_in_cluster:
            cb_stats = Cbstats(target_node)
            if vb_for_key in cb_stats.vbucket_list(bucket.name, "replica"):
                break

        self.log.info("Target node: %s, Key: %s" % (target_node.ip, key))
        self.log.info("Disabling expiry_pager")
        shell = RemoteMachineShellConnection(target_node)
        cb_ep_ctl = Cbepctl(shell)
        cb_ep_ctl.set(bucket.name, "flush_param", "exp_pager_stime", 0)

        # Create SDK client
        client = SDKClient([self.cluster.master], bucket)

        self.log.info("Non-sync write with TTL=%s" % doc_ttl)
        client.crud(DocLoading.Bucket.DocOps.CREATE, key, {}, exp=doc_ttl)

        self.sleep(doc_ttl, "Wait for document to expire")
        self.bucket_util._wait_for_stats_all_buckets(self.cluster,
                                                     self.cluster.buckets)

        self.log.info("Stopping persistence on replica VB node using cbepctl")
        cb_ep_ctl.persistence(bucket.name, "stop")

        # Start doc_load with lesser ttl
        doc_create_thread = Thread(target=perform_sync_write)
        doc_create_thread.start()
        self.sleep(2, "Wait for sync_write thread to start")

        self.log.info("Read key from another thread to trigger expiry")
        failure = None
        result = client.crud(DocLoading.Bucket.DocOps.READ, key)
        if SDKException.DocumentNotFoundException not in str(result["error"]):
            failure = "Invalid exception: %s" % result["error"]

        self.log.info("Resuming persistence on target node")
        cb_ep_ctl.persistence(bucket.name, "start")

        # Wait for doc_create_thread to complete
        doc_create_thread.join()

        # Close SDK client and shell connections
        client.close()
        shell.disconnect()

        if failure:
            self.fail(failure)

        for node in self.cluster.nodes_in_cluster:
            cb_stats = Cbstats(node).all_stats(bucket.name)
            self.log.info("Node: %s, ep_expired_access: %s" %
                          (node.ip, cb_stats["ep_expired_access"]))
            self.assertEqual(int(cb_stats["ep_expired_access"]), 0,
                             "%s: ep_expired_access != 0" % node.ip)
Example #12
0
    def test_stream_during_rollback(self):
        '''
         -- Ensure creation of at least a single state file
         -- Stop persistence on master node
         -- Start load on master node(say Node A) for a given duration(self.duration * 60 seconds)
         -- Above step ensures creation of new state files (# equal to self.duration)
         -- Kill MemCached on master node(Node A)
         -- Trigger roll back on other/replica nodes
         -- START STREAMING DATA USING DCP
         -- ReStart persistence on master node
         -- Start doc loading on all the nodes(ensure creation of state file)
         -- Above two steps ensure, roll back to new snapshot
         -- Repeat all the above steps for num_rollback times
         --
        '''
        items = self.num_items
        mem_only_items = self.input.param("rollback_items", 10000)
        ops_len = len(self.doc_ops.split(":"))
        self.assertTrue(self.rest.update_autofailover_settings(False, 600),
                        "AutoFailover disabling failed")

        if self.nodes_init < 2 or self.num_replicas < 1:
            self.fail("Not enough nodes/replicas in the cluster/bucket \
            to test rollback")

        self.duration = self.input.param("duration", 2)
        self.num_rollbacks = self.input.param("num_rollbacks", 3)

        shell = RemoteMachineShellConnection(self.cluster.master)
        cbstats = Cbstats(self.cluster.master)
        self.target_vbucket = cbstats.vbucket_list(
            self.cluster.buckets[0].name)

        #######################################################################
        '''
        STEP - 1,  Stop persistence on master node
        '''
        master_itr = 0
        for i in range(1, self.num_rollbacks + 1):
            start = items
            self.log.info("Roll back Iteration == {}".format(i))

            mem_item_count = 0

            # Stopping persistence on NodeA
            self.log.debug("Iteration == {}, stopping persistence".format(i))
            Cbepctl(shell).persistence(self.cluster.buckets[0].name, "stop")

            ###################################################################
            '''
            STEP - 2
              -- Doc ops on master node for  self.duration * 60 seconds
              -- This step ensures new state files (number equal to self.duration)
            '''
            self.log.info("Just before compute docs, iteration {}".format(i))
            self.compute_docs(start, mem_only_items)
            self.gen_create = None
            self.gen_update = None
            self.gen_delete = None
            self.gen_expiry = None
            time_end = time.time() + 60 * self.duration
            while time.time() < time_end:
                master_itr += 1
                time_start = time.time()
                mem_item_count += mem_only_items * ops_len
                self.generate_docs(doc_ops=self.doc_ops,
                                   target_vbucket=self.target_vbucket)
                self.loadgen_docs(_sync=True,
                                  retry_exceptions=self.retry_exceptions)
                if self.gen_create is not None:
                    self.create_start = self.gen_create.key_counter
                if self.gen_update is not None:
                    self.update_start = self.gen_update.key_counter
                if self.gen_delete is not None:
                    self.delete_start = self.gen_delete.key_counter
                if self.gen_expiry is not None:
                    self.expiry_start = self.gen_expiry.key_counter

                if time.time() < time_start + 60:
                    self.sleep(
                        time_start + 60 - time.time(),
                        "master_itr == {}, Sleep to ensure creation of state files for roll back,"
                        .format(master_itr))
                self.log.info("master_itr == {}, state files== {}".format(
                    master_itr, self.get_state_files(self.buckets[0])))

            ep_queue_size_map = {
                self.cluster.nodes_in_cluster[0]: mem_item_count
            }
            vb_replica_queue_size_map = {self.cluster.nodes_in_cluster[0]: 0}

            for node in self.cluster.nodes_in_cluster[1:]:
                ep_queue_size_map.update({node: 0})
                vb_replica_queue_size_map.update({node: 0})

            for bucket in self.cluster.buckets:
                self.bucket_util._wait_for_stat(bucket,
                                                ep_queue_size_map,
                                                timeout=300)
                self.bucket_util._wait_for_stat(
                    bucket,
                    vb_replica_queue_size_map,
                    cbstat_cmd="all",
                    stat_name="vb_replica_queue_size",
                    timeout=300)

            # replica vBuckets
            for bucket in self.cluster.buckets:
                self.log.debug(cbstats.failover_stats(bucket.name))
            ###################################################################
            '''
            STEP - 3
              -- Kill Memcached on master node(Node A) and trigger rollback on replica/other nodes
              -- Start streaming data (through DCP)
            '''

            shell.kill_memcached()

            self.assertTrue(
                self.bucket_util._wait_warmup_completed(
                    [self.cluster.master],
                    self.cluster.buckets[0],
                    wait_time=self.wait_timeout * 10))
            output_string = self.dcp_util.get_dcp_event()
            actual_item_count = len(
                list(filter(lambda x: 'CMD_MUTATION' in x, output_string)))
            self.log.info("actual_item_count is {}".format(actual_item_count))
            msg = "item count mismatch, expected {} actual {}"
            self.assertIs(actual_item_count == self.num_items, True,
                          msg.format(self.num_items, actual_item_count))

            ###################################################################
            '''
            STEP -4
              -- Restarting persistence on master node(Node A)
            '''

            self.log.debug("Iteration=={}, Re-Starting persistence".format(i))
            Cbepctl(shell).persistence(self.cluster.buckets[0].name, "start")
            self.sleep(
                5,
                "Iteration=={}, sleep after restarting persistence".format(i))
            ###################################################################
            '''
            STEP - 5
              -- Load Docs on all the nodes
              -- Loading of doc for 60 seconds
              -- Ensures creation of new state file
            '''
            if i != self.num_rollbacks:
                self.create_start = items
                self.create_end = items + 50000
                self.generate_docs(doc_ops="create", target_vbucket=None)
                _ = self.loadgen_docs(self.retry_exceptions,
                                      self.ignore_exceptions,
                                      _sync=True,
                                      doc_ops="create")
                self.bucket_util._wait_for_stats_all_buckets(
                    self.cluster, self.cluster.buckets, timeout=1200)
                items = items + 50000
                self.log.debug("Iteration == {}, Total num_items {}".format(
                    i, items))

        shell.disconnect()