Beispiel #1
0
    def collect_failovers_stats(self, buckets, servers, perNode=True):
        """
            Method to extract the failovers stats given by cbstats tool

            Paramters:
              buckets: bucket informaiton
              servers: server information
              perNode: if set collect per node information else all

            Returns:
              Failover stats as follows:
              if not collecting per node :: {bucket : [{key:value}]}
              if collecting per node :: {bucket : {node:[{key:value}]}}
        """
        bucketMap = {}
        for bucket in buckets:
            bucketMap[bucket.name] = {}
        for bucket in buckets:
            dataMap = {}
            for server in servers:
                #client = MemcachedClientHelper.direct_client(server, bucket)
                #stats = client.stats('failovers')
                cbstat = Cbstats(server)
                stats = cbstat.failover_stats(bucket.name)
                map_data = {}
                num_map = {}
                for okey, ovalue in stats.items():
                    vb = 'vb_' + okey
                    for ikey, ivalue in ovalue.items():
                        tokens = ikey.split(":")
                        key = tokens[0]
                        num = -1
                        if len(tokens) == 2:
                            key = tokens[1]
                            num = int(tokens[0])
                        value = ivalue.split()
                        if vb in map_data.keys() and \
                                (num == num_map[vb] or num > num_map[vb]):
                            map_data[vb][key] = value[0]
                        elif vb in map_data.keys() and key == "num_entries":
                            map_data[vb][key] = value[0]
                        elif vb not in map_data.keys():
                            m = {}
                            m[key] = value[0]
                            map_data[vb] = m
                            num_map[vb] = num
                # for o in stats.keys():
                #     tokens = o.split(":")
                #     vb = tokens[0]
                #     key = tokens[1]
                #     value = stats[o].split()
                #     num = -1
                #     if len(tokens) == 3:
                #         vb = tokens[0]
                #         num = int(tokens[1])
                #         key = tokens[2]
                #     if vb in map_data.keys() and \
                #             (num == num_map[vb] or num > num_map[vb]):
                #         map_data[vb][key] = value[0]
                #         num_map[vb] = num
                #     elif vb in map_data.keys() and key == "num_entries":
                #         map_data[vb][key] = value[0]
                #     elif vb not in map_data.keys():
                #         m = {}
                #         m[key] = value[0]
                #         map_data[vb] = m
                #         num_map[vb] = num
                if perNode:
                    dataMap[server.ip] = map_data
                else:
                    dataMap.update(map_data)
            bucketMap[bucket.name] = dataMap
        return bucketMap
Beispiel #2
0
    def test_flush_bucket_during_rollback(self):
        '''
        Test focus: Stopping persistence one by one on all nodes,
                    and trigger roll back on other  nodes,
                    During rollback flush the data
                    Above step will be done num_rollback
                    (variable defined in test) times

        STEPS:
         -- Ensure creation of at least a single state file
         -- Below steps will be repeated on all nodes, with stopping peristence on one at a time
         -- Stop persistence on node x
         -- Start load on node x for a given duration(self.duration * 60 seconds)
         -- Above step ensures creation of new state files (# equal to self.duration)
         -- Kill MemCached on Node x
         -- Trigger roll back on other/replica nodes
         -- ReStart persistence on Node -x
         -- Repeat all the above steps for num_rollback times
        '''
        self.assertTrue(self.rest.update_autofailover_settings(False, 600),
                        "AutoFailover disabling failed")
        items = copy.deepcopy(self.init_items_per_collection)
        mem_only_items = self.input.param("rollback_items", 10000)

        ops_len = len(self.doc_ops.split(":"))

        if self.nodes_init < 2 or self.num_replicas < 1:
            self.fail("Not enough nodes/replicas in the cluster/bucket \
            to test rollback")

        self.duration = self.input.param("duration", 2)
        self.num_rollbacks = self.input.param("num_rollbacks", 3)

        #######################################################################
        '''
        STEP - 1, Ensures creation of at least one snapshot

        To ensure at least one snapshot should get created before rollback
        starts, we need to sleep for 60 seconds as per magma design which
        create state file every 60s

        '''
        self.sleep(60, "Ensures creation of at least one snapshot")
        #######################################################################
        '''
        STEP - 2,  Stop persistence on node - x
        '''

        for i in range(1, self.num_rollbacks+1):
            self.log.info("Roll back Iteration == {}".format(i))
            start = items
            for x, node in enumerate(self.cluster.nodes_in_cluster):
                shell = RemoteMachineShellConnection(node)
                cbstats = Cbstats(shell)
                self.target_vbucket = cbstats.vbucket_list(self.cluster.buckets[0].
                                                   name)
                mem_item_count = 0
                # Stopping persistence on Node-x
                self.log.debug("Iteration == {}, Stopping persistence on Node-{}, ip ={}"
                               .format(i, x+1, node))
                Cbepctl(shell).persistence(self.cluster.buckets[0].name, "stop")

                ###############################################################
                '''
                STEP - 3
                  -- Load documents on node  x for  self.duration * 60 seconds
                  -- This step ensures new state files (number equal to self.duration)
                '''
                self.compute_docs(start, mem_only_items)
                self.gen_create = None
                self.gen_update = None
                self.gen_delete = None
                self.gen_expiry = None
                time_end = time.time() + 60 * self.duration
                itr = 0
                while time.time() < time_end:
                    itr += 1
                    time_start = time.time()
                    mem_item_count += mem_only_items * ops_len
                    self.generate_docs(doc_ops=self.doc_ops,
                                       target_vbucket=self.target_vbucket)
                    self.loadgen_docs(_sync=True,
                                      retry_exceptions=self.retry_exceptions)
                    if self.gen_create is not None:
                        self.create_start = self.gen_create.key_counter
                    if self.gen_update is not None:
                        self.update_start = self.gen_update.key_counter
                    if self.gen_delete is not None:
                        self.delete_start = self.gen_delete.key_counter
                    if self.gen_expiry is not None:
                        self.expiry_start = self.gen_expiry.key_counter

                    if time.time() < time_start + 60:
                        self.log.info("Rollback Iteration== {}, itr== {}, Active-Node=={}, Node=={}".format(i, itr, x+1, node))
                        self.sleep(time_start + 60 - time.time(),
                                   "Sleep to ensure creation of state files for roll back")
                        self.log.info("state files == {}".format(
                                     self.get_state_files(self.buckets[0])))
                ep_queue_size_map = {node:
                                     mem_item_count}
                if self.durability_level:
                    self.log.info("updating the num_items on disk check to double due to durability")
                    ep_queue_size_map = {node:
                                     mem_item_count * 2}
                vb_replica_queue_size_map = {node: 0}

                for nod in self.cluster.nodes_in_cluster:
                    if nod != node:
                        ep_queue_size_map.update({nod: 0})
                        vb_replica_queue_size_map.update({nod: 0})

                for bucket in self.cluster.buckets:
                    self.bucket_util._wait_for_stat(bucket, ep_queue_size_map,
                                                    timeout=1200)
                    self.bucket_util._wait_for_stat(bucket, vb_replica_queue_size_map,
                                                    cbstat_cmd="all",
                                                    stat_name="vb_replica_queue_size",
                                                    timeout=1200)
                # replica vBuckets
                for bucket in self.cluster.buckets:
                    self.log.debug(cbstats.failover_stats(bucket.name))

                ###############################################################
                '''
                STEP - 4
                  -- Kill Memcached on Node - x and trigger rollback on other nodes
                  -- After 20 seconds , flush bucket
                '''

                shell.kill_memcached()
                self.sleep(20, "sleep after killing memcached")
                self.bucket_util.flush_bucket(self.cluster, self.cluster.buckets[0])
                ###############################################################
                '''
                STEP -5
                 -- Restarting persistence on Node -- x
                '''
                self.assertTrue(self.bucket_util._wait_warmup_completed(
                    [self.cluster.master],
                    self.cluster.buckets[0],
                    wait_time=self.wait_timeout * 10))

                self.log.debug("Iteration=={}, Re-Starting persistence on Node -- {}".format(i, node))
                Cbepctl(shell).persistence(self.cluster.buckets[0].name, "start")

                self.sleep(5, "Sleep after re-starting persistence, Iteration{}".format(i))
                shell.disconnect()
                ###################################################################
                '''
                STEP - 6
                  -- Load Docs on all the nodes
                  -- Loading of doc for 60 seconds
                  -- Ensures creation of new state file
                '''
                self.create_start = 0
                self.create_end = self.init_items_per_collection
                self.generate_docs(doc_ops="create", target_vbucket=None)
                self.loadgen_docs(self.retry_exceptions,
                              self.ignore_exceptions, _sync=True,
                              doc_ops="create")
                self.bucket_util._wait_for_stats_all_buckets(self.cluster,
                                                             self.cluster.buckets,
                                                             timeout=1200)
Beispiel #3
0
    def test_failover_log_table_updated(self):
        """
        Verifies failover table entries are updated when vbucket
        ownership changes
        """

        # rebalance in nodeB
        nodeA = self.servers[0]
        nodeB = self.servers[1]

        # load nodeA only
        rest = RestConnection(nodeA)
        vbuckets = rest.get_vbuckets()
        for vb_info in vbuckets[0:4]:
            vbucket = vb_info.id
            self.load_docs(nodeA, vbucket, self.num_items)

        # add nodeB
        self.cluster.rebalance([nodeA], [nodeB], [])

        # stop nodeA and failover
        assert self.stop_node(0)
        self.stopped_nodes.append(0)
        self.master = nodeB
        assert self.cluster.failover([nodeB], [nodeA])
        assert self.cluster.rebalance([nodeB], [], [])

        # load nodeB only
        rest = RestConnection(nodeB)
        vbuckets = rest.get_vbuckets()
        for vb_info in vbuckets[0:4]:
            vbucket = vb_info.id
            self.load_docs(nodeB, vbucket, self.num_items)

        # add nodeA back
        assert self.start_node(0)
        del self.stopped_nodes[0]
        rest = RestHelper(RestConnection(nodeA))
        assert rest.is_ns_server_running()
        time.sleep(10)
        self.cluster.rebalance([nodeB], [nodeA], [])

        # stop nodeB and failover
        assert self.stop_node(1)
        self.master = nodeA
        self.stopped_nodes.append(1)
        assert self.cluster.failover([nodeA], [nodeB])
        assert self.cluster.rebalance([nodeA], [], [])

        # load nodeA only
        rest = RestConnection(nodeA)
        vbuckets = rest.get_vbuckets()
        for vb_info in vbuckets[0:4]:
            vbucket = vb_info.id
            self.load_docs(nodeA, vbucket, self.num_items)

        # Create connection for CbStats
        shell_conn = RemoteMachineShellConnection(self.cluster.master)
        cb_stat_obj = Cbstats(shell_conn)

        # Fetch bucket's failover stats
        bucket = self.bucket_util.buckets[0]
        stats = cb_stat_obj.failover_stats(bucket.name)

        # Disconnect the Cbstats shell_conn
        shell_conn.disconnect()

        # Fetch vbucket seqno stats
        vb_stat = cb_stat_obj.vbucket_seqno(bucket.name)
        # Check failover table entries
        for vb_info in vbuckets[0:4]:
            vb = vb_info.id
            assert long(stats['vb_'+str(vb)+':num_entries']) == 2

            dcp_client = self.dcp_client(nodeA, PRODUCER)
            stream = dcp_client.stream_req(vb, 0, 0, self.num_items*3,
                                           vb_stat[vb]["uuid"])

            _ = stream.run()
            assert stream.last_by_seqno == self.num_items*3, \
                stream.last_by_seqno
    def test_magma_rollback_n_times(self):
        items = self.num_items
        mem_only_items = self.input.param("rollback_items", 100000)
        if self.nodes_init < 2 or self.num_replicas < 1:
            self.fail("Not enough nodes/replicas in the cluster/bucket \
            to test rollback")
        self.num_rollbacks = self.input.param("num_rollbacks", 10)
        shell = RemoteMachineShellConnection(self.cluster_util.cluster.master)
        cbstats = Cbstats(shell)
        self.target_vbucket = cbstats.vbucket_list(
            self.bucket_util.buckets[0].name)
        start = self.num_items
        self.gen_read = copy.deepcopy(self.gen_create)
        for _ in xrange(1, self.num_rollbacks + 1):
            # Stopping persistence on NodeA
            mem_client = MemcachedClientHelper.direct_client(
                self.input.servers[0], self.bucket_util.buckets[0])
            mem_client.stop_persistence()

            self.gen_create = doc_generator(
                self.key,
                start,
                mem_only_items,
                doc_size=self.doc_size,
                doc_type=self.doc_type,
                target_vbucket=self.target_vbucket,
                vbuckets=self.cluster_util.vbuckets,
                randomize_doc_size=self.randomize_doc_size,
                randomize_value=self.randomize_value)

            self.loadgen_docs(_sync=True)
            start = self.gen_create.key_counter

            ep_queue_size_map = {
                self.cluster.nodes_in_cluster[0]: mem_only_items
            }
            vb_replica_queue_size_map = {self.cluster.nodes_in_cluster[0]: 0}

            for node in self.cluster.nodes_in_cluster[1:]:
                ep_queue_size_map.update({node: 0})
                vb_replica_queue_size_map.update({node: 0})

            for bucket in self.bucket_util.buckets:
                self.bucket_util._wait_for_stat(bucket, ep_queue_size_map)
                self.bucket_util._wait_for_stat(
                    bucket,
                    vb_replica_queue_size_map,
                    stat_name="vb_replica_queue_size")

            # Kill memcached on NodeA to trigger rollback on other Nodes
            # replica vBuckets
            for bucket in self.bucket_util.buckets:
                self.log.debug(cbstats.failover_stats(bucket.name))
            shell.kill_memcached()

            self.assertTrue(
                self.bucket_util._wait_warmup_completed(
                    [self.cluster_util.cluster.master],
                    self.bucket_util.buckets[0],
                    wait_time=self.wait_timeout * 10))
            self.sleep(10, "Not Required, but waiting for 10s after warm up")

            self.bucket_util.verify_stats_all_buckets(items, timeout=300)
            for bucket in self.bucket_util.buckets:
                self.log.debug(cbstats.failover_stats(bucket.name))

        data_validation = self.task.async_validate_docs(
            self.cluster,
            self.bucket_util.buckets[0],
            self.gen_read,
            "create",
            0,
            batch_size=self.batch_size,
            process_concurrency=self.process_concurrency,
            pause_secs=5,
            timeout_secs=self.sdk_timeout)
        self.task.jython_task_manager.get_task_result(data_validation)

        shell.disconnect()
Beispiel #5
0
    def test_stream_during_rollback(self):
        '''
         -- Ensure creation of at least a single state file
         -- Stop persistence on master node
         -- Start load on master node(say Node A) for a given duration(self.duration * 60 seconds)
         -- Above step ensures creation of new state files (# equal to self.duration)
         -- Kill MemCached on master node(Node A)
         -- Trigger roll back on other/replica nodes
         -- START STREAMING DATA USING DCP
         -- ReStart persistence on master node
         -- Start doc loading on all the nodes(ensure creation of state file)
         -- Above two steps ensure, roll back to new snapshot
         -- Repeat all the above steps for num_rollback times
         --
        '''
        items = self.num_items
        mem_only_items = self.input.param("rollback_items", 10000)
        ops_len = len(self.doc_ops.split(":"))
        self.assertTrue(self.rest.update_autofailover_settings(False, 600),
                        "AutoFailover disabling failed")

        if self.nodes_init < 2 or self.num_replicas < 1:
            self.fail("Not enough nodes/replicas in the cluster/bucket \
            to test rollback")

        self.duration = self.input.param("duration", 2)
        self.num_rollbacks = self.input.param("num_rollbacks", 3)

        shell = RemoteMachineShellConnection(self.cluster.master)
        cbstats = Cbstats(self.cluster.master)
        self.target_vbucket = cbstats.vbucket_list(
            self.cluster.buckets[0].name)

        #######################################################################
        '''
        STEP - 1,  Stop persistence on master node
        '''
        master_itr = 0
        for i in range(1, self.num_rollbacks + 1):
            start = items
            self.log.info("Roll back Iteration == {}".format(i))

            mem_item_count = 0

            # Stopping persistence on NodeA
            self.log.debug("Iteration == {}, stopping persistence".format(i))
            Cbepctl(shell).persistence(self.cluster.buckets[0].name, "stop")

            ###################################################################
            '''
            STEP - 2
              -- Doc ops on master node for  self.duration * 60 seconds
              -- This step ensures new state files (number equal to self.duration)
            '''
            self.log.info("Just before compute docs, iteration {}".format(i))
            self.compute_docs(start, mem_only_items)
            self.gen_create = None
            self.gen_update = None
            self.gen_delete = None
            self.gen_expiry = None
            time_end = time.time() + 60 * self.duration
            while time.time() < time_end:
                master_itr += 1
                time_start = time.time()
                mem_item_count += mem_only_items * ops_len
                self.generate_docs(doc_ops=self.doc_ops,
                                   target_vbucket=self.target_vbucket)
                self.loadgen_docs(_sync=True,
                                  retry_exceptions=self.retry_exceptions)
                if self.gen_create is not None:
                    self.create_start = self.gen_create.key_counter
                if self.gen_update is not None:
                    self.update_start = self.gen_update.key_counter
                if self.gen_delete is not None:
                    self.delete_start = self.gen_delete.key_counter
                if self.gen_expiry is not None:
                    self.expiry_start = self.gen_expiry.key_counter

                if time.time() < time_start + 60:
                    self.sleep(
                        time_start + 60 - time.time(),
                        "master_itr == {}, Sleep to ensure creation of state files for roll back,"
                        .format(master_itr))
                self.log.info("master_itr == {}, state files== {}".format(
                    master_itr, self.get_state_files(self.buckets[0])))

            ep_queue_size_map = {
                self.cluster.nodes_in_cluster[0]: mem_item_count
            }
            vb_replica_queue_size_map = {self.cluster.nodes_in_cluster[0]: 0}

            for node in self.cluster.nodes_in_cluster[1:]:
                ep_queue_size_map.update({node: 0})
                vb_replica_queue_size_map.update({node: 0})

            for bucket in self.cluster.buckets:
                self.bucket_util._wait_for_stat(bucket,
                                                ep_queue_size_map,
                                                timeout=300)
                self.bucket_util._wait_for_stat(
                    bucket,
                    vb_replica_queue_size_map,
                    cbstat_cmd="all",
                    stat_name="vb_replica_queue_size",
                    timeout=300)

            # replica vBuckets
            for bucket in self.cluster.buckets:
                self.log.debug(cbstats.failover_stats(bucket.name))
            ###################################################################
            '''
            STEP - 3
              -- Kill Memcached on master node(Node A) and trigger rollback on replica/other nodes
              -- Start streaming data (through DCP)
            '''

            shell.kill_memcached()

            self.assertTrue(
                self.bucket_util._wait_warmup_completed(
                    [self.cluster.master],
                    self.cluster.buckets[0],
                    wait_time=self.wait_timeout * 10))
            output_string = self.dcp_util.get_dcp_event()
            actual_item_count = len(
                list(filter(lambda x: 'CMD_MUTATION' in x, output_string)))
            self.log.info("actual_item_count is {}".format(actual_item_count))
            msg = "item count mismatch, expected {} actual {}"
            self.assertIs(actual_item_count == self.num_items, True,
                          msg.format(self.num_items, actual_item_count))

            ###################################################################
            '''
            STEP -4
              -- Restarting persistence on master node(Node A)
            '''

            self.log.debug("Iteration=={}, Re-Starting persistence".format(i))
            Cbepctl(shell).persistence(self.cluster.buckets[0].name, "start")
            self.sleep(
                5,
                "Iteration=={}, sleep after restarting persistence".format(i))
            ###################################################################
            '''
            STEP - 5
              -- Load Docs on all the nodes
              -- Loading of doc for 60 seconds
              -- Ensures creation of new state file
            '''
            if i != self.num_rollbacks:
                self.create_start = items
                self.create_end = items + 50000
                self.generate_docs(doc_ops="create", target_vbucket=None)
                _ = self.loadgen_docs(self.retry_exceptions,
                                      self.ignore_exceptions,
                                      _sync=True,
                                      doc_ops="create")
                self.bucket_util._wait_for_stats_all_buckets(
                    self.cluster, self.cluster.buckets, timeout=1200)
                items = items + 50000
                self.log.debug("Iteration == {}, Total num_items {}".format(
                    i, items))

        shell.disconnect()