Example #1
0
 def delete_all_buckets_or_assert(servers, test_case):
     log = logger.Logger.get_logger()
     log.info('deleting existing buckets on {0}'.format(servers))
     for serverInfo in servers:
         rest = RestConnection(serverInfo)
         buckets = []
         try:
             buckets = rest.get_buckets()
         except:
             log.info('15 seconds sleep before calling get_buckets again...')
             time.sleep(15)
             buckets = rest.get_buckets()
         for bucket in buckets:
             status = rest.delete_bucket(bucket.name)
             if not status:
                 try:
                     log.info(StatsCommon.get_stats([serverInfo], bucket.name, "timings"))
                 except:
                     log.error("Unable to get timings for bucket")
             log.info('deleted bucket : {0} from {1}'.format(bucket.name, serverInfo.ip))
             msg = 'bucket "{0}" was not deleted even after waiting for two minutes'.format(bucket.name)
             if test_case:
                 if not BucketOperationHelper.wait_for_bucket_deletion(bucket.name, rest, 200):
                     try:
                         log.info(StatsCommon.get_stats([serverInfo], bucket.name, "timings"))
                     except:
                         log.error("Unable to get timings for bucket")
                     test_case.fail(msg)
Example #2
0
    def delete_bucket_or_assert(serverInfo, bucket='default', test_case=None):
        log = logger.Logger.get_logger()
        log.info('deleting existing bucket {0} on {1}'.format(
            bucket, serverInfo))

        rest = RestConnection(serverInfo)
        if RestHelper(rest).bucket_exists(bucket):
            status = rest.delete_bucket(bucket)
            if not status:
                try:
                    BucketOperationHelper.print_dataStorage_content(
                        [serverInfo])
                    log.info(
                        StatsCommon.get_stats([serverInfo], bucket, "timings"))
                except:
                    log.error("Unable to get timings for bucket")
            log.info('deleted bucket : {0} from {1}'.format(
                bucket, serverInfo.ip))
        msg = 'bucket "{0}" was not deleted even after waiting for two minutes'.format(
            bucket)
        if test_case:
            if not BucketOperationHelper.wait_for_bucket_deletion(
                    bucket, rest, 200):
                try:
                    print_dataStorage_content([serverInfo])
                    log.info(
                        StatsCommon.get_stats([serverInfo], bucket, "timings"))
                except:
                    log.error("Unable to get timings for bucket")
                test_case.fail(msg)
Example #3
0
 def delete_all_buckets_or_assert(servers, test_case):
     log = logger.Logger.get_logger()
     for serverInfo in servers:
         rest = RestConnection(serverInfo)
         buckets = []
         try:
             buckets = rest.get_buckets()
         except Exception as e:
             log.error(e)
             log.error(
                 '15 seconds sleep before calling get_buckets again...')
             time.sleep(15)
             buckets = rest.get_buckets()
         log.info('deleting existing buckets {0} on {1}'.format(
             [b.name for b in buckets], serverInfo.ip))
         for bucket in buckets:
             log.info("remove bucket {0} ...".format(bucket.name))
             try:
                 status = rest.delete_bucket(bucket.name)
             except ServerUnavailableException as e:
                 log.error(e)
                 log.error(
                     '5 seconds sleep before calling delete_bucket again...'
                 )
                 time.sleep(5)
                 status = rest.delete_bucket(bucket.name)
             if not status:
                 try:
                     BucketOperationHelper.print_dataStorage_content(
                         servers)
                     log.info(
                         StatsCommon.get_stats([serverInfo], bucket.name,
                                               "timings"))
                 except:
                     log.error("Unable to get timings for bucket")
             log.info('deleted bucket : {0} from {1}'.format(
                 bucket.name, serverInfo.ip))
             msg = 'bucket "{0}" was not deleted even after waiting for two minutes'.format(
                 bucket.name)
             if test_case:
                 if not BucketOperationHelper.wait_for_bucket_deletion(
                         bucket.name, rest, 200):
                     try:
                         BucketOperationHelper.print_dataStorage_content(
                             servers)
                         log.info(
                             StatsCommon.get_stats([serverInfo],
                                                   bucket.name, "timings"))
                     except:
                         log.error("Unable to get timings for bucket")
                     test_case.fail(msg)
Example #4
0
    def _verify_checkpoint_id(self, param, stat_key, m_stats):
        timeout = 60 if (self.num_items * .001) < 60 else self.num_items * .001

        #verify checkpiont id increases on master node
        chk_pnt = int(m_stats[m_stats.keys()[0]])
        tasks = []
        tasks.append(
            self.cluster.async_wait_for_stats([self.master], self.bucket,
                                              param, stat_key, '>', chk_pnt))
        for task in tasks:
            try:
                task.result(timeout)
            except TimeoutError:
                self.fail("New checkpoint not created")

        time.sleep(timeout / 10)
        # verify Master and all replicas are in sync with checkpoint ids
        m_stats = StatsCommon.get_stats([self.master], self.bucket, param,
                                        stat_key)
        chk_pnt = int(m_stats[m_stats.keys()[0]])
        tasks = []
        for server in self.servers:
            tasks.append(
                self.cluster.async_wait_for_stats([server], self.bucket, param,
                                                  stat_key, '==', chk_pnt))
        for task in tasks:
            try:
                task.result(timeout)
            except TimeoutError:
                self.fail(
                    "Master and all replicas are NOT in sync with checkpoint ids"
                )
Example #5
0
    def checkpoint_server_down(self):
        """Load N items. Shut down server R2. Then Restart R2 and
        verify backfill happens on R1 and R2."""

        param = 'checkpoint'
        stat_key = 'vb_0:open_checkpoint_id'
        rest = RestConnection(self.master)

        self._set_checkpoint_size(self.servers[:self.num_servers], self.bucket, self.checkpoint_size)
        generate_load_one = BlobGenerator('nosql', 'nosql-', self.value_size, end=self.num_items)
        self._load_all_buckets(self.master, generate_load_one, "create", 0, 1, 0, True, batch_size=self.checkpoint_size, pause_secs=5, timeout_secs=180)
        self._wait_for_stats_all_buckets(self.servers[:self.num_servers])
        prev_backfill_timestamp_R1 = self._get_backfill_timestamp(self.replica1, self.replica2)
        prev_backfill_timestamp_R2 = self._get_backfill_timestamp(self.replica2, self.replica3)

        m_stats = StatsCommon.get_stats([self.master], self.bucket, param, stat_key)
        self._stop_server(self.replica2)
        time.sleep(5)
        data_load_thread = Thread(target=self._load_data_use_workloadgen, name="load_data", args=(self.master,))
        data_load_thread.start()
        data_load_thread.join()
        self._start_server(self.replica2)
        time.sleep(5)

        self._verify_checkpoint_id(param, stat_key, m_stats)
        self._verify_backfill_happen(self.replica1, self.replica2, prev_backfill_timestamp_R1, True)
        self._verify_backfill_happen(self.replica2, self.replica3, prev_backfill_timestamp_R2, True)
Example #6
0
    def checkpoint_create_time(self):
        """Load data, but let the timeout create a new checkpoint on all replicas"""

        param = 'checkpoint'
        stat_key = 'vb_0:open_checkpoint_id'

        self._set_checkpoint_timeout(self.servers[:self.num_servers],
                                     self.bucket, str(self.timeout))

        generate_load = BlobGenerator('nosql',
                                      'nosql-',
                                      self.value_size,
                                      end=self.num_items)
        self._load_all_buckets(self.master,
                               generate_load,
                               "create",
                               0,
                               1,
                               0,
                               True,
                               batch_size=self.checkpoint_size,
                               pause_secs=5,
                               timeout_secs=180)
        self._wait_for_stats_all_buckets(self.servers[:self.num_servers])

        chk_stats = StatsCommon.get_stats([self.master], self.bucket, param,
                                          stat_key)
        self.log.info("Sleeping for {0} seconds)".format(self.timeout + 5))
        time.sleep(self.timeout + 5)
        self._verify_checkpoint_id(param, stat_key, chk_stats)
        self._verify_stats_all_buckets(self.servers[:self.num_servers])
Example #7
0
    def checkpoint_collapse(self):
        """With 3 replicas, stop replication on R2, let Master and R1 close checkpoint.
        Run load until a new checkpoint is created on Master and R1.
        Wait till checkpoints merge on R1. Restart replication of R2.
        Checkpoint should advance to the latest on R2."""

        param = 'checkpoint'
        stat_key = 'vb_0:last_closed_checkpoint_id'
        stat_chk_itms = 'vb_0:num_checkpoint_items'

        self._set_checkpoint_size(self.servers[:self.num_servers], self.bucket,
                                  str(self.checkpoint_size))
        self._stop_replication(self.replica2, self.bucket)

        generate_load = BlobGenerator('nosql',
                                      'nosql-',
                                      self.value_size,
                                      end=self.num_items)
        data_load_thread = Thread(target=self._load_all_buckets,
                                  name="load_data",
                                  args=(self.master, generate_load, "create",
                                        0, 1, 0, True, self.checkpoint_size, 5,
                                        180))
        data_load_thread.start()
        m_stats = StatsCommon.get_stats([self.master], self.bucket, param,
                                        stat_key)

        tasks = []
        chk_pnt = int(m_stats[m_stats.keys()[0]]) + 2
        tasks.append(
            self.cluster.async_wait_for_stats([self.master], self.bucket,
                                              param, stat_key, '>=', chk_pnt))
        tasks.append(
            self.cluster.async_wait_for_stats([self.replica1], self.bucket,
                                              param, stat_key, '>=', chk_pnt))
        tasks.append(
            self.cluster.async_wait_for_stats([self.replica1], self.bucket,
                                              param, stat_chk_itms, '>=',
                                              self.num_items))
        data_load_thread.join()
        for task in tasks:
            try:
                task.result(60)
            except TimeoutError:
                self.fail("Checkpoint not collapsed")

        tasks = []
        self._start_replication(self.replica2, self.bucket)
        tasks.append(
            self.cluster.async_wait_for_stats([self.replica1], self.bucket,
                                              param, stat_chk_itms, '<',
                                              self.num_items))
        for task in tasks:
            try:
                task.result(60)
            except TimeoutError:
                self.fail("Checkpoints not replicated to replica2")

        self._verify_checkpoint_id(param, stat_key, m_stats)
        self._verify_stats_all_buckets(self.servers[:self.num_servers])
Example #8
0
    def checkpoint_create_time(self):
        param = 'checkpoint'
        timeout = 60
        stat_key = 'vb_0:open_checkpoint_id'

        master = self._get_server_by_state(self.servers[:self.num_servers],
                                           self.bucket, ACTIVE)
        self._set_checkpoint_timeout(self.servers[:self.num_servers],
                                     self.bucket, str(timeout))
        chk_stats = StatsCommon.get_stats(self.servers[:self.num_servers],
                                          self.bucket, param, stat_key)
        load_thread = self.generate_load(master, self.bucket, 1)
        load_thread.join()
        log.info("Sleeping for {0} seconds)".format(timeout))
        time.sleep(timeout)
        tasks = []
        for server, value in chk_stats.items():
            tasks.append(
                self.cluster.async_wait_for_stats([server], self.bucket, param,
                                                  stat_key, '>', value))
        for task in tasks:
            try:
                task.result(60)
            except TimeoutError:
                self.fail("New checkpoint not created")
        self._set_checkpoint_timeout(self.servers[:self.num_servers],
                                     self.bucket, str(600))
Example #9
0
    def checkpoint_create_items(self):
        """Load data until a new checkpoint is created on all replicas"""

        param = 'checkpoint'
        stat_key = 'vb_0:open_checkpoint_id'

        self._set_checkpoint_size(self.servers[:self.num_servers], self.bucket,
                                  str(self.checkpoint_size))
        chk_stats = StatsCommon.get_stats([self.master], self.bucket, param,
                                          stat_key)

        generate_load = BlobGenerator('nosql',
                                      'nosql-',
                                      self.value_size,
                                      end=self.num_items)
        self._load_all_buckets(self.master,
                               generate_load,
                               "create",
                               0,
                               1,
                               0,
                               True,
                               batch_size=self.checkpoint_size,
                               pause_secs=5,
                               timeout_secs=180)
        self._wait_for_stats_all_buckets(self.servers[:self.num_servers])

        self._verify_checkpoint_id(param, stat_key, chk_stats)
        self._verify_stats_all_buckets(self.servers[:self.num_servers])
Example #10
0
    def _verify_checkpoint_id(self, param, stat_key, m_stats):
        timeout = 60 if (self.num_items * .001) < 60 else self.num_items * .001

        #verify checkpiont id increases on master node
        chk_pnt = int(m_stats[m_stats.keys()[0]])
        tasks = []
        tasks.append(self.cluster.async_wait_for_stats([self.master], self.bucket, param, stat_key, '>', chk_pnt))
        for task in tasks:
            try:
                task.result(timeout)
            except TimeoutError:
                self.fail("New checkpoint not created")

        time.sleep(timeout / 10)
        # verify Master and all replicas are in sync with checkpoint ids
        m_stats = StatsCommon.get_stats([self.master], self.bucket, param, stat_key)
        chk_pnt = int(m_stats[m_stats.keys()[0]])
        tasks = []
        for server in self.servers:
            tasks.append(self.cluster.async_wait_for_stats([server], self.bucket, param, stat_key, '==', chk_pnt))
        for task in tasks:
            try:
                task.result(timeout)
            except TimeoutError:
                self.fail("Master and all replicas are NOT in sync with checkpoint ids")
Example #11
0
    def _warmup_check_without_access_log(self):
        if not self.without_access_log:
            return True

        warmed_up = {}
        stats_all_buckets = {}
        for bucket in self.buckets:
            stats_all_buckets[bucket.name] = StatsCommon()
            warmed_up[bucket.name] = {}
            for server in self.servers:
                warmed_up[bucket.name][server] = False

        for bucket in self.buckets:
            for server in self.servers:
                start = time.time()
                while time.time() - start < self.timeout and not warmed_up[
                        bucket.name][server]:
                    if stats_all_buckets[bucket.name].get_stats([server], bucket, 'warmup', 'ep_warmup_key_count')[server] >= \
                       stats_all_buckets[bucket.name].get_stats([server], bucket, 'warmup', 'ep_warmup_min_item_threshold')[server] or \
                       stats_all_buckets[bucket.name].get_stats([server], bucket, '', 'mem_used')[server] >= \
                       stats_all_buckets[bucket.name].get_stats([server], bucket, 'warmup', 'ep_warmup_min_memory_threshold')[server] or \
                       stats_all_buckets[bucket.name].get_stats([server], bucket, '', 'mem_used')[server] >= \
                       stats_all_buckets[bucket.name].get_stats([server], bucket, '', 'ep_mem_low_wat')[server]:
                        warmed_up[bucket.name][server] = True
                    else:
                        self.log.info(
                            "curr_items is %s and ep_warmup_min_item_threshold is %s"
                            % (stats_all_buckets[bucket.name].get_stats(
                                [server], bucket, '', 'curr_items')[server],
                               stats_all_buckets[bucket.name].get_stats(
                                   [server], bucket, 'warmup',
                                   'ep_warmup_min_item_threshold')[server]))
                        self.log.info(
                            "vb_active_perc_mem_resident is %s and ep_warmup_min_memory_threshold is %s"
                            % (stats_all_buckets[bucket.name].get_stats(
                                [server], bucket, '',
                                'vb_active_perc_mem_resident')[server],
                               stats_all_buckets[bucket.name].get_stats(
                                   [server], bucket, 'warmup',
                                   'ep_warmup_min_memory_threshold')[server]))
                        self.log.info(
                            "mem_used is %s and ep_mem_low_wat is %s" %
                            (stats_all_buckets[bucket.name].get_stats(
                                [server], bucket, '', 'mem_used')[server],
                             stats_all_buckets[bucket.name].get_stats(
                                 [server], bucket, '',
                                 'ep_mem_low_wat')[server]))

                    time.sleep(10)

        for bucket in self.buckets:
            for server in self.servers:
                if warmed_up[bucket.name][server] == True:
                    continue
                elif warmed_up[bucket.name][server] == False:
                    return False
        return True
Example #12
0
 def _get_backfill_timestamp(self, server, replica_server):
     param = 'tap'
     stat_key = 'eq_tapq:replication_ns_1@%s:backfill_start_timestamp' % (
         replica_server.ip)
     m_stats = StatsCommon.get_stats([server], self.bucket, param, stat_key)
     self.log.info(
         "eq_tapq:replication_ns_1@%s:backfill_start_timestamp: %s" %
         (replica_server.ip, m_stats[m_stats.keys()[0]]))
     return int(m_stats[m_stats.keys()[0]])
Example #13
0
    def delete_bucket_or_assert(serverInfo, bucket='default', test_case=None):
        log = logger.Logger.get_logger()
        log.info('deleting existing buckets on {0}'.format(serverInfo))

        rest = RestConnection(serverInfo)
        if RestHelper(rest).bucket_exists(bucket):
            status = rest.delete_bucket(bucket)
            if not status:
                try:
                    log.info(StatsCommon.get_stats([serverInfo], bucket, "timings"))
                except:
                    log.error("Unable to get timings for bucket")
            log.info('deleted bucket : {0} from {1}'.format(bucket, serverInfo.ip))
        msg = 'bucket "{0}" was not deleted even after waiting for two minutes'.format(bucket)
        if test_case:
            if not BucketOperationHelper.wait_for_bucket_deletion(bucket, rest, 200):
                 try:
                    log.info(StatsCommon.get_stats([serverInfo], bucket, "timings"))
                 except:
                    log.error("Unable to get timings for bucket")
                 test_case.fail(msg)
Example #14
0
    def checkpoint_collapse(self):
        param = 'checkpoint'
        chk_size = 5000
        num_items = 25000
        stat_key = 'vb_0:last_closed_checkpoint_id'
        stat_chk_itms = 'vb_0:num_checkpoint_items'

        master = self._get_server_by_state(self.servers[:self.num_servers],
                                           self.bucket, ACTIVE)
        slave1 = self._get_server_by_state(self.servers[:self.num_servers],
                                           self.bucket, REPLICA1)
        slave2 = self._get_server_by_state(self.servers[:self.num_servers],
                                           self.bucket, REPLICA2)
        self._set_checkpoint_size(self.servers[:self.num_servers], self.bucket,
                                  str(chk_size))
        m_stats = StatsCommon.get_stats([master], self.bucket, param, stat_key)
        self._stop_replication(slave2, self.bucket)
        load_thread = self.generate_load(master, self.bucket, num_items)
        load_thread.join()

        tasks = []
        chk_pnt = str(int(m_stats[m_stats.keys()[0]]) + (num_items / chk_size))
        tasks.append(
            self.cluster.async_wait_for_stats([master], self.bucket, param,
                                              stat_key, '==', chk_pnt))
        tasks.append(
            self.cluster.async_wait_for_stats([slave1], self.bucket, param,
                                              stat_key, '==', chk_pnt))
        tasks.append(
            self.cluster.async_wait_for_stats([slave1], self.bucket, param,
                                              stat_chk_itms, '>=',
                                              str(num_items)))
        for task in tasks:
            try:
                task.result(60)
            except TimeoutError:
                self.fail("Checkpoint not collapsed")

        tasks = []
        self._start_replication(slave2, self.bucket)
        chk_pnt = str(int(m_stats[m_stats.keys()[0]]) + (num_items / chk_size))
        tasks.append(
            self.cluster.async_wait_for_stats([slave2], self.bucket, param,
                                              stat_key, '==', chk_pnt))
        tasks.append(
            self.cluster.async_wait_for_stats([slave1], self.bucket, param,
                                              stat_chk_itms, '<', num_items))
        for task in tasks:
            try:
                task.result(60)
            except TimeoutError:
                self.fail("Checkpoints not replicated to secondary slave")
Example #15
0
    def checkpoint_replication_pause(self):
        """With 3 replicas load data. pause replication to R2. Let checkpoints close on Master and R1.
        Restart replication of R2 and R3, backfill should not be seen on R1 and R2."""

        param = 'checkpoint'
        stat_key = 'vb_0:last_closed_checkpoint_id'

        self._set_checkpoint_size(self.servers[:self.num_servers], self.bucket,
                                  str(self.checkpoint_size))
        time.sleep(5)
        prev_backfill_timestamp_R1 = self._get_backfill_timestamp(
            self.replica1, self.replica2)
        prev_backfill_timestamp_R2 = self._get_backfill_timestamp(
            self.replica2, self.replica3)

        generate_load = BlobGenerator('nosql',
                                      'nosql-',
                                      self.value_size,
                                      end=self.num_items)
        data_load_thread = Thread(target=self._load_all_buckets,
                                  name="load_data",
                                  args=(self.master, generate_load, "create",
                                        0, 1, 0, True, self.checkpoint_size, 5,
                                        180))
        data_load_thread.start()
        self._stop_replication(self.replica2, self.bucket)

        m_stats = StatsCommon.get_stats([self.master], self.bucket, param,
                                        stat_key)
        chk_pnt = int(m_stats[m_stats.keys()[0]]) + 2
        tasks = []
        tasks.append(
            self.cluster.async_wait_for_stats([self.master], self.bucket,
                                              param, stat_key, '>=', chk_pnt))
        tasks.append(
            self.cluster.async_wait_for_stats([self.replica1], self.bucket,
                                              param, stat_key, '>=', chk_pnt))
        for task in tasks:
            try:
                task.result(60)
            except TimeoutError:
                self.fail("Checkpoint not closed")

        data_load_thread.join()
        self._start_replication(self.replica2, self.bucket)

        self._verify_checkpoint_id(param, stat_key, m_stats)
        self._verify_stats_all_buckets(self.servers[:self.num_servers])
        self._verify_backfill_happen(self.replica1, self.replica2,
                                     prev_backfill_timestamp_R1)
        self._verify_backfill_happen(self.replica2, self.replica3,
                                     prev_backfill_timestamp_R2)
 def delete_all_buckets_or_assert(servers, test_case):
     log = logger.Logger.get_logger()
     for serverInfo in servers:
         rest = RestConnection(serverInfo)
         buckets = []
         try:
             buckets = rest.get_buckets()
         except Exception as e:
             log.error(e)
             log.error('15 seconds sleep before calling get_buckets again...')
             time.sleep(15)
             buckets = rest.get_buckets()
         log.info('deleting existing buckets {0} on {1}'.format([b.name for b in buckets], serverInfo.ip))
         for bucket in buckets:
             log.info("remove bucket {0} ...".format(bucket.name))
             try:
                 status = rest.delete_bucket(bucket.name)
             except ServerUnavailableException as e:
                 log.error(e)
                 log.error('5 seconds sleep before calling delete_bucket again...')
                 time.sleep(5)
                 status = rest.delete_bucket(bucket.name)
             if not status:
                 try:
                     BucketOperationHelper.print_dataStorage_content(servers)
                     log.info(StatsCommon.get_stats([serverInfo], bucket.name, "timings"))
                 except:
                     log.error("Unable to get timings for bucket")
             log.info('deleted bucket : {0} from {1}'.format(bucket.name, serverInfo.ip))
             msg = 'bucket "{0}" was not deleted even after waiting for two minutes'.format(bucket.name)
             if test_case:
                 if not BucketOperationHelper.wait_for_bucket_deletion(bucket.name, rest, 200):
                     try:
                         BucketOperationHelper.print_dataStorage_content(servers)
                         log.info(StatsCommon.get_stats([serverInfo], bucket.name, "timings"))
                     except:
                         log.error("Unable to get timings for bucket")
                     test_case.fail(msg)
    def test_verify_mb8825(self):
        # Setting up replication clusters.
        src_cluster_name, dest_cluster_name = "remote-dest-src", "remote-src-dest"
        self.__setup_replication_clusters(self.src_master, self.dest_master, src_cluster_name, dest_cluster_name)

        # Step-3 Load 10k items ( sets=80, deletes=20) on source cluster.
        self._load_all_buckets(self.src_master, self.gen_create, "create", 0)

        # Step-4 XDCR Source -> Remote
        self._replicate_clusters(self.src_master, dest_cluster_name)
        self.merge_buckets(self.src_master, self.dest_master, bidirection=False)

        # Step-5 Wait for replication to finish 50% at destination node
        expected_items = (self.gen_create.end) * 0.5
        dest_master_buckets = self._get_cluster_buckets(self.dest_master)

        tasks = []
        for bucket in dest_master_buckets:
            tasks.append(self.cluster.async_wait_for_stats([self.dest_master], bucket, '', 'curr_items', '>=', expected_items))
        for task in tasks:
            task.result(self._timeout * 5)

        # Perform 20% delete on Source cluster.
        tasks = []
        self.gen_delete = BlobGenerator('loadOne', 'loadOne-', self._value_size, start=0, end=int((self._num_items) * (float)(self._percent_delete) / 100))
        tasks.extend(self._async_load_all_buckets(self.src_master, self.gen_delete, "delete", 0))

        # Step-6 XDCR Remote -> Source
        self._replicate_clusters(self.dest_master, src_cluster_name)
        self.merge_buckets(self.dest_master, self.src_master, bidirection=False)

        # Wait for delete tasks to be finished
        for task in tasks:
            task.result()

        # Step-7 Wait for all the items to be replicated
        # Step-8 Compare the source and destination cluster items - item count, meta data, data content.
        self.sleep(self._timeout * 5)
        self.verify_results(verify_src=True)

        # Verify if no deletion performed at source node:
        src_master_buckets = self._get_cluster_buckets(self.src_master)
        for bucket in src_master_buckets:
            src_stat_ep_num_ops_del_meta = StatsCommon.get_stats([self.src_master], bucket, '', 'ep_num_ops_del_meta')
            src_stat_ep_num_ops_set_meta = StatsCommon.get_stats([self.src_master], bucket, '', 'ep_num_ops_set_meta')
            self.assertNotEqual(src_stat_ep_num_ops_set_meta, 0, "Number of set [%s] operation occurs at bucket = %s, while expected to 0" % (src_stat_ep_num_ops_set_meta, bucket))
            self.assertNotEqual(src_stat_ep_num_ops_del_meta, 0, "Number of delete [%s] operation occurs at bucket = %s, while expected to 0" % (src_stat_ep_num_ops_del_meta, bucket))
            if self.rep_type == "xmem":
                src_stat_ep_num_ops_del_meta_res_fail = StatsCommon.get_stats([self.src_master], bucket, '', 'ep_num_ops_del_meta_res_fail')
                src_stat_ep_num_ops_set_meta_res_fail = StatsCommon.get_stats([self.src_master], bucket, '', 'ep_num_ops_set_meta_res_fail')
                dest_stat_ep_num_ops_del_meta = StatsCommon.get_stats([self.dest_master], bucket, '', 'ep_num_ops_del_meta')

                self.assertNotEqual(src_stat_ep_num_ops_del_meta_res_fail, dest_stat_ep_num_ops_del_meta, "Number of failed delete [%s] operation occurs at bucket = %s, while expected to %s" % (src_stat_ep_num_ops_del_meta_res_fail, bucket, dest_stat_ep_num_ops_del_meta))
                self.assertTrue(src_stat_ep_num_ops_set_meta_res_fail > 0, "Number of failed set [%s] operation occurs at bucket = %s, while expected greater than 0" % (src_stat_ep_num_ops_set_meta_res_fail, bucket))
            elif self.rep_type == "capi":
                src_stat_ep_num_ops_get_meta = StatsCommon.get_stats([self.src_master], bucket, '', 'ep_num_ops_get_meta')
                self.assertTrue(src_stat_ep_num_ops_get_meta > 0, "Number of get [%s] operation occurs at bucket = %s, while expected greater than 0" % (src_stat_ep_num_ops_get_meta, bucket))
Example #18
0
    def checkpoint_create_items(self):
        """Load data until a new checkpoint is created on all replicas"""

        param = 'checkpoint'
        stat_key = 'vb_0:open_checkpoint_id'

        self._set_checkpoint_size(self.servers[:self.num_servers], self.bucket, str(self.checkpoint_size))
        chk_stats = StatsCommon.get_stats([self.master], self.bucket, param, stat_key)

        generate_load = BlobGenerator('nosql', 'nosql-', self.value_size, end=self.num_items)
        self._load_all_buckets(self.master, generate_load, "create", 0, 1, 0, True, batch_size=self.checkpoint_size, pause_secs=5, timeout_secs=180)
        self._wait_for_stats_all_buckets(self.servers[:self.num_servers])

        self._verify_checkpoint_id(param, stat_key, chk_stats)
        self._verify_stats_all_buckets(self.servers[:self.num_servers])
Example #19
0
 def delete_all_buckets_or_assert(servers, test_case):
     log = logger.Logger.get_logger()
     log.info('deleting existing buckets on {0}'.format(servers))
     for serverInfo in servers:
         rest = RestConnection(serverInfo)
         buckets = []
         try:
             buckets = rest.get_buckets()
         except:
             log.info(
                 '15 seconds sleep before calling get_buckets again...')
             time.sleep(15)
             buckets = rest.get_buckets()
         for bucket in buckets:
             status = rest.delete_bucket(bucket.name)
             if not status:
                 try:
                     log.info(
                         StatsCommon.get_stats([serverInfo], bucket.name,
                                               "timings"))
                 except:
                     log.error("Unable to get timings for bucket")
             log.info('deleted bucket : {0} from {1}'.format(
                 bucket.name, serverInfo.ip))
             msg = 'bucket "{0}" was not deleted even after waiting for two minutes'.format(
                 bucket.name)
             if test_case:
                 if not BucketOperationHelper.wait_for_bucket_deletion(
                         bucket.name, rest, 200):
                     try:
                         log.info(
                             StatsCommon.get_stats([serverInfo],
                                                   bucket.name, "timings"))
                     except:
                         log.error("Unable to get timings for bucket")
                     test_case.fail(msg)
Example #20
0
    def checkpoint_collapse(self):
        """With 3 replicas, stop replication on R2, let Master and R1 close checkpoint.
        Run load until a new checkpoint is created on Master and R1.
        Wait till checkpoints merge on R1. Restart replication of R2.
        Checkpoint should advance to the latest on R2."""

        param = 'checkpoint'
        stat_key = 'vb_0:last_closed_checkpoint_id'
        stat_chk_itms = 'vb_0:num_checkpoint_items'

        self._set_checkpoint_size(self.servers[:self.num_servers], self.bucket, str(self.checkpoint_size))
        self._stop_replication(self.replica2, self.bucket)

        generate_load = BlobGenerator('nosql', 'nosql-', self.value_size, end=self.num_items)
        data_load_thread = Thread(target=self._load_all_buckets,
                                  name="load_data",
                                  args=(self.master, generate_load, "create", 0, 1, 0, True, self.checkpoint_size, 5, 180))
        data_load_thread.start()
        m_stats = StatsCommon.get_stats([self.master], self.bucket, param, stat_key)

        tasks = []
        chk_pnt = int(m_stats[m_stats.keys()[0]]) + 2
        tasks.append(self.cluster.async_wait_for_stats([self.master], self.bucket, param, stat_key,
                                                       '>=', chk_pnt))
        tasks.append(self.cluster.async_wait_for_stats([self.replica1], self.bucket, param, stat_key,
                                                       '>=', chk_pnt))
        tasks.append(self.cluster.async_wait_for_stats([self.replica1], self.bucket, param,
                                                       stat_chk_itms, '>=', self.num_items))
        data_load_thread.join()
        for task in tasks:
            try:
                task.result(60)
            except TimeoutError:
                self.fail("Checkpoint not collapsed")

        tasks = []
        self._start_replication(self.replica2, self.bucket)
        tasks.append(self.cluster.async_wait_for_stats([self.replica1], self.bucket, param,
                                                       stat_chk_itms, '<', self.num_items))
        for task in tasks:
            try:
                task.result(60)
            except TimeoutError:
                self.fail("Checkpoints not replicated to replica2")

        self._verify_checkpoint_id(param, stat_key, m_stats)
        self._verify_stats_all_buckets(self.servers[:self.num_servers])
Example #21
0
    def checkpoint_server_down(self):
        """Load N items. Shut down server R2. Then Restart R2 and
        verify backfill happens on R1 and R2."""

        param = 'checkpoint'
        stat_key = 'vb_0:open_checkpoint_id'
        rest = RestConnection(self.master)

        self._set_checkpoint_size(self.servers[:self.num_servers], self.bucket,
                                  self.checkpoint_size)
        generate_load_one = BlobGenerator('nosql',
                                          'nosql-',
                                          self.value_size,
                                          end=self.num_items)
        self._load_all_buckets(self.master,
                               generate_load_one,
                               "create",
                               0,
                               1,
                               0,
                               True,
                               batch_size=self.checkpoint_size,
                               pause_secs=5,
                               timeout_secs=180)
        self._wait_for_stats_all_buckets(self.servers[:self.num_servers])
        prev_backfill_timestamp_R1 = self._get_backfill_timestamp(
            self.replica1, self.replica2)
        prev_backfill_timestamp_R2 = self._get_backfill_timestamp(
            self.replica2, self.replica3)

        m_stats = StatsCommon.get_stats([self.master], self.bucket, param,
                                        stat_key)
        self._stop_server(self.replica2)
        time.sleep(5)
        data_load_thread = Thread(target=self._load_data_use_workloadgen,
                                  name="load_data",
                                  args=(self.master, ))
        data_load_thread.start()
        data_load_thread.join()
        self._start_server(self.replica2)
        time.sleep(5)

        self._verify_checkpoint_id(param, stat_key, m_stats)
        self._verify_backfill_happen(self.replica1, self.replica2,
                                     prev_backfill_timestamp_R1, True)
        self._verify_backfill_happen(self.replica2, self.replica3,
                                     prev_backfill_timestamp_R2, True)
Example #22
0
    def checkpoint_replication_pause_failover(self):
        """Load N items. Stop replication R3. Load N' more items.
        Failover R2. When restart replication to R3, verify backfill doesn't happen on R1."""

        param = 'checkpoint'
        stat_key = 'vb_0:open_checkpoint_id'
        rest = RestConnection(self.master)
        nodes = rest.node_statuses()
        failover_node = None
        for node in nodes:
            if node.id.find(self.replica2.ip) >= 0:
                failover_node = node

        self._set_checkpoint_size(self.servers[:self.num_servers], self.bucket, self.checkpoint_size)
        generate_load_one = BlobGenerator('nosql', 'nosql-', self.value_size, end=self.num_items)
        self._load_all_buckets(self.master, generate_load_one, "create", 0, 1, 0, True, batch_size=self.checkpoint_size, pause_secs=5, timeout_secs=180)
        self._wait_for_stats_all_buckets(self.servers[:self.num_servers])
        prev_backfill_timestamp_R1 = self._get_backfill_timestamp(self.replica1, self.replica2)
        m_stats = StatsCommon.get_stats([self.master], self.bucket, param, stat_key)
        self._stop_replication(self.replica3, self.bucket)

        generate_load_two = BlobGenerator('sqlite', 'sqlite-', self.value_size, end=self.num_items)
        data_load_thread = Thread(target=self._load_all_buckets,
                                          name="load_data",
                                          args=(self.master, generate_load_two, "create", 0, 1, 0, True, self.checkpoint_size, 5, 180))
        data_load_thread.start()

        failed_over = rest.fail_over(failover_node.id)
        if not failed_over:
            self.log.info("unable to failover the node the first time. try again in  60 seconds..")
            #try again in 60 seconds
            time.sleep(75)
            failed_over = rest.fail_over(failover_node.id)
        self.assertTrue(failed_over, "unable to failover node %s".format(self.replica2.ip))
        self.log.info("failed over node : {0}".format(failover_node.id))
        data_load_thread.join()
        self._start_replication(self.replica3, self.bucket)

        self.servers = []
        self.servers = [self.master, self.replica1, self.replica3]
        self.num_servers = len(self.servers)
        self._verify_checkpoint_id(param, stat_key, m_stats)
        self._verify_stats_all_buckets(self.servers[:self.num_servers])
        self._verify_backfill_happen(self.replica1, self.replica2, prev_backfill_timestamp_R1)
        self.cluster.rebalance([self.master, self.replica1, self.replica2, self.replica3], [], [self.replica2])
        self.cluster.rebalance([self.master, self.replica1, self.replica3], [self.replica2], [])
Example #23
0
    def checkpoint_create_time(self):
        """Load data, but let the timeout create a new checkpoint on all replicas"""

        param = 'checkpoint'
        stat_key = 'vb_0:open_checkpoint_id'

        self._set_checkpoint_timeout(self.servers[:self.num_servers], self.bucket, str(self.timeout))

        generate_load = BlobGenerator('nosql', 'nosql-', self.value_size, end=self.num_items)
        self._load_all_buckets(self.master, generate_load, "create", 0, 1, 0, True, batch_size=self.checkpoint_size, pause_secs=5, timeout_secs=180)
        self._wait_for_stats_all_buckets(self.servers[:self.num_servers])

        chk_stats = StatsCommon.get_stats([self.master], self.bucket, param, stat_key)
        self.log.info("Sleeping for {0} seconds)".format(self.timeout + 5))
        time.sleep(self.timeout + 5)
        self._verify_checkpoint_id(param, stat_key, chk_stats)
        self._verify_stats_all_buckets(self.servers[:self.num_servers])
Example #24
0
    def checkpoint_deduplication(self):
        """Disable replication of R1. Load N items to master, then mutate some of them.
        Restart replication of R1, only N items should be in stats. In this test, we can
        only load number of items <= checkpoint_size to observe deduplication"""

        param = 'checkpoint'
        stat_key = 'vb_0:num_open_checkpoint_items'
        stat_key_id = 'vb_0:open_checkpoint_id'

        self._set_checkpoint_size(self.servers[:self.num_servers], self.bucket, self.checkpoint_size)
        self._stop_replication(self.replica1, self.bucket)

        generate_load = BlobGenerator('nosql', 'nosql-', self.value_size, end=self.num_items)
        generate_update = BlobGenerator('nosql', 'sql-', self.value_size, end=self.num_items)
        self._load_all_buckets(self.master, generate_load, "create", 0, 1, 0, True, batch_size=self.checkpoint_size, pause_secs=5, timeout_secs=180)
        self._wait_for_stats_all_buckets([self.master, self.replica2, self.replica3])
        m_stats = StatsCommon.get_stats([self.master], self.bucket, param, stat_key_id)
        data_load_thread = Thread(target=self._load_all_buckets,
                                  name="load_data",
                                  args=(self.master, generate_update, "update", 0, 1, 0, True, self.checkpoint_size, 5, 180))
        data_load_thread.start()
        self._start_replication(self.replica1, self.bucket)
        data_load_thread.join()

        chk_pnt = int(m_stats[m_stats.keys()[0]])
        timeout = 60 if (self.num_items * .001) < 60 else self.num_items * .001
        time.sleep(timeout)
        tasks = []
        tasks.append(self.cluster.async_wait_for_stats([self.master], self.bucket, param,
                                                       stat_key, '==', self.num_items))
        tasks.append(self.cluster.async_wait_for_stats([self.replica1], self.bucket, param,
                                                       stat_key, '==', self.num_items))
        tasks.append(self.cluster.async_wait_for_stats([self.master], self.bucket, param,
                                                       stat_key_id, '==', chk_pnt))
        tasks.append(self.cluster.async_wait_for_stats([self.replica1], self.bucket, param,
                                                       stat_key_id, '==', chk_pnt))

        for task in tasks:
            try:
                task.result(60)
            except TimeoutError:
                self.fail("Items weren't deduplicated")

        self._verify_stats_all_buckets(self.servers[:self.num_servers])
Example #25
0
    def checkpoint_collapse(self):
        param = 'checkpoint'
        chk_size = 5000
        num_items = 25000
        stat_key = 'vb_0:last_closed_checkpoint_id'
        stat_chk_itms = 'vb_0:num_checkpoint_items'

        master = self._get_server_by_state(self.servers[:self.num_servers], self.bucket, ACTIVE)
        slave1 = self._get_server_by_state(self.servers[:self.num_servers], self.bucket, REPLICA1)
        slave2 = self._get_server_by_state(self.servers[:self.num_servers], self.bucket, REPLICA2)
        self._set_checkpoint_size(self.servers[:self.num_servers], self.bucket, str(chk_size))
        m_stats = StatsCommon.get_stats([master], self.bucket, param, stat_key)
        self._stop_replication(slave2, self.bucket)
        load_thread = self.generate_load(master, self.bucket, num_items)
        load_thread.join()

        tasks = []
        chk_pnt = str(int(m_stats[m_stats.keys()[0]]) + (num_items / chk_size))
        tasks.append(self.cluster.async_wait_for_stats([master], self.bucket, param, stat_key,
                                                       '==', chk_pnt))
        tasks.append(self.cluster.async_wait_for_stats([slave1], self.bucket, param, stat_key,
                                                       '==', chk_pnt))
        tasks.append(self.cluster.async_wait_for_stats([slave1], self.bucket, param,
                                                       stat_chk_itms, '>=', str(num_items)))
        for task in tasks:
            try:
                task.result(60)
            except TimeoutError:
                self.fail("Checkpoint not collapsed")

        tasks = []
        self._start_replication(slave2, self.bucket)
        chk_pnt = str(int(m_stats[m_stats.keys()[0]]) + (num_items / chk_size))
        tasks.append(self.cluster.async_wait_for_stats([slave2], self.bucket, param, stat_key,
                                                       '==', chk_pnt))
        tasks.append(self.cluster.async_wait_for_stats([slave1], self.bucket, param,
                                                       stat_chk_itms, '<', num_items))
        for task in tasks:
            try:
                task.result(60)
            except TimeoutError:
                self.fail("Checkpoints not replicated to secondary slave")
Example #26
0
    def checkpoint_create_items(self):
        param = 'checkpoint'
        stat_key = 'vb_0:open_checkpoint_id'
        num_items = 6000

        master = self._get_server_by_state(self.servers[:self.num_servers], self.bucket, ACTIVE)
        self._set_checkpoint_size(self.servers[:self.num_servers], self.bucket, '5000')
        chk_stats = StatsCommon.get_stats(self.servers[:self.num_servers], self.bucket,
                                          param, stat_key)
        load_thread = self.generate_load(master, self.bucket, num_items)
        load_thread.join()
        tasks = []
        for server, value in chk_stats.items():
            tasks.append(self.cluster.async_wait_for_stats([server], self.bucket, param,
                                                           stat_key, '>', value))
        for task in tasks:
            try:
                timeout = 30 if (num_items * .001) < 30 else num_items * .001
                task.result(timeout)
            except TimeoutError:
                self.fail("New checkpoint not created")
Example #27
0
    def checkpoint_replication_pause(self):
        """With 3 replicas load data. pause replication to R2. Let checkpoints close on Master and R1.
        Restart replication of R2 and R3, backfill should not be seen on R1 and R2."""

        param = 'checkpoint'
        stat_key = 'vb_0:last_closed_checkpoint_id'

        self._set_checkpoint_size(self.servers[:self.num_servers], self.bucket, str(self.checkpoint_size))
        time.sleep(5)
        prev_backfill_timestamp_R1 = self._get_backfill_timestamp(self.replica1, self.replica2)
        prev_backfill_timestamp_R2 = self._get_backfill_timestamp(self.replica2, self.replica3)

        generate_load = BlobGenerator('nosql', 'nosql-', self.value_size, end=self.num_items)
        data_load_thread = Thread(target=self._load_all_buckets,
                                  name="load_data",
                                  args=(self.master, generate_load, "create", 0, 1, 0, True, self.checkpoint_size, 5, 180))
        data_load_thread.start()
        self._stop_replication(self.replica2, self.bucket)

        m_stats = StatsCommon.get_stats([self.master], self.bucket, param, stat_key)
        chk_pnt = int(m_stats[m_stats.keys()[0]]) + 2
        tasks = []
        tasks.append(self.cluster.async_wait_for_stats([self.master], self.bucket, param, stat_key,
                                                       '>=', chk_pnt))
        tasks.append(self.cluster.async_wait_for_stats([self.replica1], self.bucket, param, stat_key,
                                                       '>=', chk_pnt))
        for task in tasks:
            try:
                task.result(60)
            except TimeoutError:
                self.fail("Checkpoint not closed")

        data_load_thread.join()
        self._start_replication(self.replica2, self.bucket)

        self._verify_checkpoint_id(param, stat_key, m_stats)
        self._verify_stats_all_buckets(self.servers[:self.num_servers])
        self._verify_backfill_happen(self.replica1, self.replica2, prev_backfill_timestamp_R1)
        self._verify_backfill_happen(self.replica2, self.replica3, prev_backfill_timestamp_R2)
Example #28
0
    def checkpoint_create_time(self):
        param = 'checkpoint'
        timeout = 60
        stat_key = 'vb_0:open_checkpoint_id'

        master = self._get_server_by_state(self.servers[:self.num_servers], self.bucket, ACTIVE)
        self._set_checkpoint_timeout(self.servers[:self.num_servers], self.bucket, str(timeout))
        chk_stats = StatsCommon.get_stats(self.servers[:self.num_servers], self.bucket,
                                          param, stat_key)
        load_thread = self.generate_load(master, self.bucket, 1)
        load_thread.join()
        log.info("Sleeping for {0} seconds)".format(timeout))
        time.sleep(timeout)
        tasks = []
        for server, value in chk_stats.items():
            tasks.append(self.cluster.async_wait_for_stats([server], self.bucket, param,
                                                           stat_key, '>', value))
        for task in tasks:
            try:
                task.result(60)
            except TimeoutError:
                self.fail("New checkpoint not created")
        self._set_checkpoint_timeout(self.servers[:self.num_servers], self.bucket, str(600))
Example #29
0
    def _create_access_log(self):
        stats_all_buckets = {}
        for bucket in self.buckets:
            stats_all_buckets[bucket.name] = StatsCommon()

        for bucket in self.buckets:
            for server in self.servers:
                scanner_runs = stats_all_buckets[bucket.name].get_stats(
                    [server], bucket, '', 'ep_num_access_scanner_runs')[server]
                self.log.info(
                    "current access scanner run for %s in bucket %s is %s times"
                    % (server.ip, bucket.name, scanner_runs))
                self.log.info(
                    "setting access scanner time %s minutes for %s in bucket %s"
                    % (self.access_log_time, server.ip, bucket.name))
                ClusterOperationHelper.flushctl_set(server, "alog_sleep_time",
                                                    self.access_log_time,
                                                    bucket.name)
                if not self._wait_for_access_run(
                        self.access_log_time, scanner_runs, server, bucket,
                        stats_all_buckets[bucket.name]):
                    self.fail(
                        "Not able to create access log within %s minutes" %
                        self.access_log_time)
Example #30
0
    def checkpoint_create_items(self):
        param = 'checkpoint'
        stat_key = 'vb_0:open_checkpoint_id'
        num_items = 6000

        master = self._get_server_by_state(self.servers[:self.num_servers],
                                           self.bucket, ACTIVE)
        self._set_checkpoint_size(self.servers[:self.num_servers], self.bucket,
                                  '5000')
        chk_stats = StatsCommon.get_stats(self.servers[:self.num_servers],
                                          self.bucket, param, stat_key)
        load_thread = self.generate_load(master, self.bucket, num_items)
        load_thread.join()
        tasks = []
        for server, value in chk_stats.items():
            tasks.append(
                self.cluster.async_wait_for_stats([server], self.bucket, param,
                                                  stat_key, '>', value))
        for task in tasks:
            try:
                timeout = 30 if (num_items * .001) < 30 else num_items * .001
                task.result(timeout)
            except TimeoutError:
                self.fail("New checkpoint not created")
Example #31
0
    def test_items_append(self):
        self.desired_item_size = self.input.param("desired_item_size", 2048)
        self.append_size = self.input.param("append_size", 1024)
        self.fixed_append_size = self.input.param("fixed_append_size", True)
        self.append_ratio = self.input.param("append_ratio", 0.5)
        self._load_all_buckets(self.master, self.gen_create, "create", 0,
                               batch_size=10000, pause_secs=5, timeout_secs=100)

        for bucket in self.buckets:
            verify_dict = {}
            vkeys, dkeys = bucket.kvs[1].key_set()

            key_count = len(vkeys)
            app_ratio = self.append_ratio * key_count
            selected_keys = []
            i = 0
            for key in vkeys:
                i += 1
                if i >= app_ratio:
                    break
                selected_keys.append(key)

            awareness = VBucketAwareMemcached(RestConnection(self.master), bucket.name)
            if self.kv_verify:
                for key in selected_keys:
                    value = awareness.memcached(key).get(key)[2]
                    verify_dict[key] = value

            self.log.info("Bucket: {0}".format(bucket.name))
            self.log.info("Appending to have items whose initial size was "
                            + "{0} to equal or cross a size of {1}".format(self.value_size, self.desired_item_size))
            self.log.info("Item-appending of {0} items starting ..".format(len(selected_keys)+1))

            index = 3
            while self.value_size < self.desired_item_size:
                str_len = self.append_size
                if not self.fixed_append_size:
                    str_len = int(math.pow(2, index))

                for key in selected_keys:
                    random_string = self.random_str_generator(str_len)
                    awareness.memcached(key).append(key, random_string)

                    if self.kv_verify:
                        verify_dict[key] = verify_dict[key] + random_string

                self.value_size += str_len
                index += 1

            self.log.info("The appending of {0} items ended".format(len(selected_keys)+1))

            msg = "Bucket:{0}".format(bucket.name)
            self.log.info("VERIFICATION <" + msg + ">: Phase 0 - Check the gap between "
                      + "mem_used by the bucket and total_allocated_bytes")
            stats = StatsCommon()
            mem_used_stats = stats.get_stats(self.servers, bucket, 'memory', 'mem_used')
            total_allocated_bytes_stats = stats.get_stats(self.servers, bucket, 'memory', 'total_allocated_bytes')
            total_fragmentation_bytes_stats = stats.get_stats(self.servers, bucket, 'memory', 'total_fragmentation_bytes')

            for server in self.servers:
                self.log.info("In {0} bucket {1}, total_fragmentation_bytes + the total_allocated_bytes = {2}"
                              .format(server.ip, bucket.name, (int(total_fragmentation_bytes_stats[server]) + int(total_allocated_bytes_stats[server]))))
                self.log.info("In {0} bucket {1}, mem_used = {2}".format(server.ip, bucket.name, mem_used_stats[server]))
                self.log.info("In {0} bucket {1}, the difference between acutal memory used by memcached and mem_used is {2} times"
                              .format(server.ip, bucket.name, float(int(total_fragmentation_bytes_stats[server]) + int(total_allocated_bytes_stats[server]))/float(mem_used_stats[server])))

            self.log.info("VERIFICATION <" + msg + ">: Phase1 - Check if any of the "
                    + "selected keys have value less than the desired value size")
            for key in selected_keys:
                value = awareness.memcached(key).get(key)[2]
                if len(value) < self.desired_item_size:
                    self.fail("Failed to append enough to make value size surpass the "
                                + "size, for key {0}".format(key))

            if self.kv_verify:
                self.log.info("VERIFICATION <" + msg + ">: Phase2 - Check if the content "
                        + "after the appends match whats expected")
                for k in verify_dict:
                    if awareness.memcached(key).get(k)[2] != verify_dict[k]:
                        self.fail("Content at key {0}: not what's expected.".format(k))
                self.log.info("VERIFICATION <" + msg + ">: Successful")
Example #32
0
    def _load_dgm(self):
        generate_load = BlobGenerator('nosql',
                                      'nosql-',
                                      self.value_size,
                                      end=self.num_items)
        self._load_all_buckets(self.master,
                               generate_load,
                               "create",
                               0,
                               1,
                               0,
                               True,
                               batch_size=20000,
                               pause_secs=5,
                               timeout_secs=180)
        self.load_gen_list.append(generate_load)

        stats_all_buckets = {}
        for bucket in self.buckets:
            stats_all_buckets[bucket.name] = StatsCommon()

        for bucket in self.buckets:
            threshold_reached = False
            while not threshold_reached:
                for server in self.servers:
                    active_resident = stats_all_buckets[bucket.name].get_stats(
                        [server], bucket, '',
                        'vb_active_perc_mem_resident')[server]
                    if int(active_resident) > self.active_resident_threshold:
                        self.log.info(
                            "resident ratio is %s greater than %s for %s in bucket %s. Continue loading to the cluster"
                            % (active_resident, self.active_resident_threshold,
                               server.ip, bucket.name))
                        random_key = key_generator()
                        generate_load = BlobGenerator(random_key,
                                                      '%s-' % random_key,
                                                      self.value_size,
                                                      end=self.num_items)
                        self._load_all_buckets(self.master,
                                               generate_load,
                                               "create",
                                               0,
                                               1,
                                               0,
                                               True,
                                               batch_size=20000,
                                               pause_secs=5,
                                               timeout_secs=180)
                        self.load_gen_list.append(generate_load)
                    else:
                        threshold_reached = True
                        self.log.info(
                            "DGM state achieved for %s in bucket %s!" %
                            (server.ip, bucket.name))
                        break

        if (self.doc_ops is not None):
            if ("update" in self.doc_ops):
                for gen in self.load_gen_list[:int(
                        len(self.load_gen_list) * 0.5)]:
                    self._load_all_buckets(self.master,
                                           gen,
                                           "update",
                                           0,
                                           1,
                                           0,
                                           True,
                                           batch_size=20000,
                                           pause_secs=5,
                                           timeout_secs=180)
            if ("delete" in self.doc_ops):
                for gen in self.load_gen_list[
                        int(len(self.load_gen_list) * 0.5):]:
                    self._load_all_buckets(self.master,
                                           gen,
                                           "delete",
                                           0,
                                           1,
                                           0,
                                           True,
                                           batch_size=20000,
                                           pause_secs=5,
                                           timeout_secs=180)
            if ("expire" in self.doc_ops):
                for gen in self.load_gen_list[:int(
                        len(self.load_gen_list) * 0.8)]:
                    self._load_all_buckets(self.master,
                                           gen,
                                           "update",
                                           self.expire_time,
                                           1,
                                           0,
                                           True,
                                           batch_size=20000,
                                           pause_secs=5,
                                           timeout_secs=180)
                time.sleep(self.expire_time * 2)

                for server in self.servers:
                    shell = RemoteMachineShellConnection(server)
                    for bucket in self.buckets:
                        shell.execute_cbepctl(bucket, "", "set flush_param",
                                              "exp_pager_stime", 5)
                    shell.disconnect()
                time.sleep(30)
Example #33
0
    def checkpoint_replication_pause_failover(self):
        """Load N items. Stop replication R3. Load N' more items.
        Failover R2. When restart replication to R3, verify backfill doesn't happen on R1."""

        param = 'checkpoint'
        stat_key = 'vb_0:open_checkpoint_id'
        rest = RestConnection(self.master)
        nodes = rest.node_statuses()
        failover_node = None
        for node in nodes:
            if node.id.find(self.replica2.ip) >= 0:
                failover_node = node

        self._set_checkpoint_size(self.servers[:self.num_servers], self.bucket,
                                  self.checkpoint_size)
        generate_load_one = BlobGenerator('nosql',
                                          'nosql-',
                                          self.value_size,
                                          end=self.num_items)
        self._load_all_buckets(self.master,
                               generate_load_one,
                               "create",
                               0,
                               1,
                               0,
                               True,
                               batch_size=self.checkpoint_size,
                               pause_secs=5,
                               timeout_secs=180)
        self._wait_for_stats_all_buckets(self.servers[:self.num_servers])
        prev_backfill_timestamp_R1 = self._get_backfill_timestamp(
            self.replica1, self.replica2)
        m_stats = StatsCommon.get_stats([self.master], self.bucket, param,
                                        stat_key)
        self._stop_replication(self.replica3, self.bucket)

        generate_load_two = BlobGenerator('sqlite',
                                          'sqlite-',
                                          self.value_size,
                                          end=self.num_items)
        data_load_thread = Thread(target=self._load_all_buckets,
                                  name="load_data",
                                  args=(self.master, generate_load_two,
                                        "create", 0, 1, 0, True,
                                        self.checkpoint_size, 5, 180))
        data_load_thread.start()

        failed_over = rest.fail_over(failover_node.id)
        if not failed_over:
            self.log.info(
                "unable to failover the node the first time. try again in  60 seconds.."
            )
            #try again in 60 seconds
            time.sleep(75)
            failed_over = rest.fail_over(failover_node.id)
        self.assertTrue(failed_over,
                        "unable to failover node %s".format(self.replica2.ip))
        self.log.info("failed over node : {0}".format(failover_node.id))
        data_load_thread.join()
        self._start_replication(self.replica3, self.bucket)

        self.servers = []
        self.servers = [self.master, self.replica1, self.replica3]
        self.num_servers = len(self.servers)
        self._verify_checkpoint_id(param, stat_key, m_stats)
        self._verify_stats_all_buckets(self.servers[:self.num_servers])
        self._verify_backfill_happen(self.replica1, self.replica2,
                                     prev_backfill_timestamp_R1)
        self.cluster.rebalance(
            [self.master, self.replica1, self.replica2, self.replica3], [],
            [self.replica2])
        self.cluster.rebalance([self.master, self.replica1, self.replica3],
                               [self.replica2], [])
Example #34
0
    def checkpoint_deduplication(self):
        """Disable replication of R1. Load N items to master, then mutate some of them.
        Restart replication of R1, only N items should be in stats. In this test, we can
        only load number of items <= checkpoint_size to observe deduplication"""

        param = 'checkpoint'
        stat_key = 'vb_0:num_open_checkpoint_items'
        stat_key_id = 'vb_0:open_checkpoint_id'

        self._set_checkpoint_size(self.servers[:self.num_servers], self.bucket,
                                  self.checkpoint_size)
        self._stop_replication(self.replica1, self.bucket)

        generate_load = BlobGenerator('nosql',
                                      'nosql-',
                                      self.value_size,
                                      end=self.num_items)
        generate_update = BlobGenerator('nosql',
                                        'sql-',
                                        self.value_size,
                                        end=self.num_items)
        self._load_all_buckets(self.master,
                               generate_load,
                               "create",
                               0,
                               1,
                               0,
                               True,
                               batch_size=self.checkpoint_size,
                               pause_secs=5,
                               timeout_secs=180)
        self._wait_for_stats_all_buckets(
            [self.master, self.replica2, self.replica3])
        m_stats = StatsCommon.get_stats([self.master], self.bucket, param,
                                        stat_key_id)
        data_load_thread = Thread(target=self._load_all_buckets,
                                  name="load_data",
                                  args=(self.master, generate_update, "update",
                                        0, 1, 0, True, self.checkpoint_size, 5,
                                        180))
        data_load_thread.start()
        self._start_replication(self.replica1, self.bucket)
        data_load_thread.join()

        chk_pnt = int(m_stats[m_stats.keys()[0]])
        timeout = 60 if (self.num_items * .001) < 60 else self.num_items * .001
        time.sleep(timeout)
        tasks = []
        tasks.append(
            self.cluster.async_wait_for_stats([self.master], self.bucket,
                                              param, stat_key, '==',
                                              self.num_items))
        tasks.append(
            self.cluster.async_wait_for_stats([self.replica1], self.bucket,
                                              param, stat_key, '==',
                                              self.num_items))
        tasks.append(
            self.cluster.async_wait_for_stats([self.master], self.bucket,
                                              param, stat_key_id, '==',
                                              chk_pnt))
        tasks.append(
            self.cluster.async_wait_for_stats([self.replica1], self.bucket,
                                              param, stat_key_id, '==',
                                              chk_pnt))

        for task in tasks:
            try:
                task.result(60)
            except TimeoutError:
                self.fail("Items weren't deduplicated")

        self._verify_stats_all_buckets(self.servers[:self.num_servers])
Example #35
0
    def _warmup_check(self):
        warmed_up = {}
        stats_all_buckets = {}
        for bucket in self.buckets:
            stats_all_buckets[bucket.name] = StatsCommon()
            warmed_up[bucket.name] = {}
            for server in self.servers:
                warmed_up[bucket.name][server] = False

        for bucket in self.buckets:
            for server in self.servers:
                start = time.time()
                warmup_complete = False

                while not warmup_complete:
                    try:
                        if stats_all_buckets[bucket.name].get_stats(
                            [server], bucket, 'warmup',
                                'ep_warmup_thread')[server] == "complete":
                            self.log.info(
                                "warmup completed for %s in bucket %s" %
                                (server.ip, bucket.name))
                            warmup_complete = True
                        elif stats_all_buckets[bucket.name].get_stats(
                            [server], bucket, 'warmup',
                                'ep_warmup_thread')[server] == "running":
                            self.log.info(
                                "warming up is still running for %s in bucket %s....curr_items_tot : %s"
                                % (server.ip, bucket.name,
                                   stats_all_buckets[bucket.name].get_stats(
                                       [server], bucket, '',
                                       'curr_items_tot')[server]))

                        warmup_time = int(
                            stats_all_buckets[bucket.name].get_stats(
                                [server], bucket, 'warmup',
                                'ep_warmup_time')[server])
                        if warmup_time is not None:
                            self.log.info(
                                "ep_warmup_time is %s for %s in bucket %s" %
                                (warmup_time, server.ip, bucket.name))
                    except Exception as e:
                        self.log.error(
                            "Could not get warmup_time stats from server %s:%s, exception %s"
                            % (server.ip, server.port, e))

                start = time.time()
                while time.time() - start < self.timeout and not warmed_up[
                        bucket.name][server]:
                    if stats_all_buckets[bucket.name].get_stats([server], bucket, '', 'curr_items_tot')[server] == \
                        self.pre_warmup_stats[bucket.name]["%s:%s" % (server.ip, server.port)]["curr_items_tot"]:
                        if stats_all_buckets[bucket.name].get_stats([server], bucket, '', 'curr_items')[server] == \
                           self.pre_warmup_stats[bucket.name]["%s:%s" % (server.ip, server.port)]["curr_items"]:
                            if self._warmup_check_without_access_log():
                                warmed_up[bucket.name][server] = True
                                self._stats_report(
                                    server, bucket,
                                    stats_all_buckets[bucket.name])
                        else:
                            self.log.info(
                                "curr_items is %s not equal to %s" %
                                (stats_all_buckets[bucket.name].get_stats(
                                    [server], bucket, '',
                                    'curr_items')[server],
                                 self.pre_warmup_stats[bucket.name][
                                     "%s:%s" %
                                     (server.ip, server.port)]["curr_items"]))
                    else:
                        self.log.info(
                            "curr_items_tot is %s not equal to %s" %
                            (stats_all_buckets[bucket.name].get_stats(
                                [server], bucket, '',
                                'curr_items_tot')[server],
                             self.pre_warmup_stats[bucket.name][
                                 "%s:%s" %
                                 (server.ip, server.port)]["curr_items_tot"]))

                    time.sleep(10)

        for bucket in self.buckets:
            for server in self.servers:
                if warmed_up[bucket.name][server] == True:
                    continue
                elif warmed_up[bucket.name][server] == False:
                    return False
        return True
Example #36
0
    def _common_test_body_failed_swap_rebalance(self):
        master = self.servers[0]
        rest = RestConnection(master)
        num_initial_servers = self.num_initial_servers
        creds = self.input.membase_settings
        intial_severs = self.servers[:num_initial_servers]

        self.log.info("CREATE BUCKET PHASE")
        SwapRebalanceBase.create_buckets(self)

        # Cluster all starting set of servers
        self.log.info("INITIAL REBALANCE PHASE")
        RebalanceHelper.rebalance_in(intial_severs, len(intial_severs) - 1)

        self.log.info("DATA LOAD PHASE")
        loaders = SwapRebalanceBase.start_load_phase(self, master)

        # Wait till load phase is over
        SwapRebalanceBase.stop_load(loaders, do_stop=False)
        self.log.info("DONE LOAD PHASE")

        # Start the swap rebalance
        current_nodes = RebalanceHelper.getOtpNodeIds(master)
        self.log.info("current nodes : {0}".format(current_nodes))
        toBeEjectedNodes = RebalanceHelper.pick_nodes(master, howmany=self.num_swap)
        optNodesIds = [node.id for node in toBeEjectedNodes]
        if self.swap_orchestrator:
            status, content = ClusterHelper.find_orchestrator(master)
            self.assertTrue(status, msg="Unable to find orchestrator: {0}:{1}".\
            format(status, content))
            # When swapping all the nodes
            if self.num_swap is len(current_nodes):
                optNodesIds.append(content)
            else:
                optNodesIds[0] = content

        for node in optNodesIds:
            self.log.info("removing node {0} and rebalance afterwards".format(node))

        new_swap_servers = self.servers[num_initial_servers:num_initial_servers + self.num_swap]
        for server in new_swap_servers:
            otpNode = rest.add_node(creds.rest_username, creds.rest_password, server.ip)
            msg = "unable to add node {0} to the cluster"
            self.assertTrue(otpNode, msg.format(server.ip))

        if self.swap_orchestrator:
            rest = RestConnection(new_swap_servers[0])
            master = new_swap_servers[0]

        self.log.info("DATA ACCESS PHASE")
        loaders = SwapRebalanceBase.start_access_phase(self, master)

        self.log.info("SWAP REBALANCE PHASE")
        rest.rebalance(otpNodes=[node.id for node in rest.node_statuses()],
            ejectedNodes=optNodesIds)

        # Rebalance is failed at 20%, 40% and 60% completion
        for i in [1, 2, 3]:
            expected_progress = 20 * i
            self.log.info("FAIL SWAP REBALANCE PHASE @ {0}".format(expected_progress))
            RestHelper(rest).rebalance_reached(expected_progress)
            bucket = rest.get_buckets()[0].name
            pid = StatsCommon.get_stats([master], bucket, "", "pid")[master]
            command = "os:cmd(\"kill -9 {0} \")".format(pid)
            self.log.info(command)
            killed = rest.diag_eval(command)
            self.log.info("killed {0}:{1}??  {2} ".format(master.ip, master.port, killed))
            BaseTestCase._wait_warmup_completed(self, [master], bucket, wait_time=600)
            time.sleep(5)

            rest.rebalance(otpNodes=[node.id for node in rest.node_statuses()],
                ejectedNodes=optNodesIds)

        self.assertTrue(rest.monitorRebalance(),
            msg="rebalance operation failed after adding node {0}".format(toBeEjectedNodes))

        # Stop loaders
        SwapRebalanceBase.stop_load(loaders)

        self.log.info("DONE DATA ACCESS PHASE")
        #for bucket in rest.get_buckets():
        #    SwapRebalanceBase.verify_data(new_swap_servers[0], bucket_data[bucket.name].get('inserted_keys'),\
        #        bucket.name, self)
        #    RebalanceHelper.wait_for_persistence(master, bucket.name)

        self.log.info("VERIFICATION PHASE")
        SwapRebalanceBase.items_verification(master, self)
Example #37
0
 def _get_backfill_timestamp(self, server, replica_server):
     param = 'tap'
     stat_key = 'eq_tapq:replication_ns_1@%s:backfill_start_timestamp' % (replica_server.ip)
     m_stats = StatsCommon.get_stats([server], self.bucket, param, stat_key)
     self.log.info("eq_tapq:replication_ns_1@%s:backfill_start_timestamp: %s" % (replica_server.ip, m_stats[m_stats.keys()[0]]))
     return int(m_stats[m_stats.keys()[0]])
Example #38
0
    def _common_test_body_failed_swap_rebalance(self):
        master = self.servers[0]
        rest = RestConnection(master)
        num_initial_servers = self.num_initial_servers
        creds = self.input.membase_settings
        intial_severs = self.servers[:num_initial_servers]

        self.log.info("CREATE BUCKET PHASE")
        SwapRebalanceBase.create_buckets(self)

        # Cluster all starting set of servers
        self.log.info("INITIAL REBALANCE PHASE")
        RebalanceHelper.rebalance_in(intial_severs, len(intial_severs) - 1)

        self.log.info("DATA LOAD PHASE")
        loaders = SwapRebalanceBase.start_load_phase(self, master)

        # Wait till load phase is over
        SwapRebalanceBase.stop_load(loaders, do_stop=False)
        self.log.info("DONE LOAD PHASE")

        # Start the swap rebalance
        current_nodes = RebalanceHelper.getOtpNodeIds(master)
        self.log.info("current nodes : {0}".format(current_nodes))
        toBeEjectedNodes = RebalanceHelper.pick_nodes(master,
                                                      howmany=self.num_swap)
        optNodesIds = [node.id for node in toBeEjectedNodes]
        if self.swap_orchestrator:
            status, content = ClusterHelper.find_orchestrator(master)
            self.assertTrue(status, msg="Unable to find orchestrator: {0}:{1}".\
            format(status, content))
            # When swapping all the nodes
            if self.num_swap is len(current_nodes):
                optNodesIds.append(content)
            else:
                optNodesIds[0] = content

        for node in optNodesIds:
            self.log.info(
                "removing node {0} and rebalance afterwards".format(node))

        new_swap_servers = self.servers[
            num_initial_servers:num_initial_servers + self.num_swap]
        for server in new_swap_servers:
            otpNode = rest.add_node(creds.rest_username, creds.rest_password,
                                    server.ip)
            msg = "unable to add node {0} to the cluster"
            self.assertTrue(otpNode, msg.format(server.ip))

        if self.swap_orchestrator:
            rest = RestConnection(new_swap_servers[0])
            master = new_swap_servers[0]

        self.log.info("DATA ACCESS PHASE")
        loaders = SwapRebalanceBase.start_access_phase(self, master)

        self.log.info("SWAP REBALANCE PHASE")
        rest.rebalance(otpNodes=[node.id for node in rest.node_statuses()],
                       ejectedNodes=optNodesIds)

        # Rebalance is failed at 20%, 40% and 60% completion
        for i in [1, 2, 3]:
            expected_progress = 20 * i
            self.log.info(
                "FAIL SWAP REBALANCE PHASE @ {0}".format(expected_progress))
            RestHelper(rest).rebalance_reached(expected_progress)
            bucket = rest.get_buckets()[0].name
            pid = StatsCommon.get_stats([master], bucket, "", "pid")[master]
            command = "os:cmd(\"kill -9 {0} \")".format(pid)
            self.log.info(command)
            killed = rest.diag_eval(command)
            self.log.info("killed {0}:{1}??  {2} ".format(
                master.ip, master.port, killed))
            BaseTestCase._wait_warmup_completed(self, [master],
                                                bucket,
                                                wait_time=600)
            time.sleep(5)

            rest.rebalance(otpNodes=[node.id for node in rest.node_statuses()],
                           ejectedNodes=optNodesIds)

        self.assertTrue(
            rest.monitorRebalance(),
            msg="rebalance operation failed after adding node {0}".format(
                toBeEjectedNodes))

        # Stop loaders
        SwapRebalanceBase.stop_load(loaders)

        self.log.info("DONE DATA ACCESS PHASE")
        #for bucket in rest.get_buckets():
        #    SwapRebalanceBase.verify_data(new_swap_servers[0], bucket_data[bucket.name].get('inserted_keys'),\
        #        bucket.name, self)
        #    RebalanceHelper.wait_for_persistence(master, bucket.name)

        self.log.info("VERIFICATION PHASE")
        SwapRebalanceBase.items_verification(master, self)
Example #39
0
 def stat(self, key):
     stats =  StatsCommon.get_stats([self.master], 'default', "", key)
     val = stats.values()[0]
     if val.isdigit():
         val = int(val)
     return val