Exemple #1
0
    def delete_bucket_or_assert(serverInfo, bucket='default', test_case=None):
        log = logger.Logger.get_logger()
        log.info('deleting existing bucket {0} on {1}'.format(
            bucket, serverInfo))

        rest = RestConnection(serverInfo)
        if RestHelper(rest).bucket_exists(bucket):
            status = rest.delete_bucket(bucket)
            if not status:
                try:
                    BucketOperationHelper.print_dataStorage_content(
                        [serverInfo])
                    log.info(
                        StatsCommon.get_stats([serverInfo], bucket, "timings"))
                except:
                    log.error("Unable to get timings for bucket")
            log.info('deleted bucket : {0} from {1}'.format(
                bucket, serverInfo.ip))
        msg = 'bucket "{0}" was not deleted even after waiting for two minutes'.format(
            bucket)
        if test_case:
            if not BucketOperationHelper.wait_for_bucket_deletion(
                    bucket, rest, 200):
                try:
                    BucketOperationHelper.print_dataStorage_content(
                        [serverInfo])
                    log.info(
                        StatsCommon.get_stats([serverInfo], bucket, "timings"))
                except:
                    log.error("Unable to get timings for bucket")
                test_case.fail(msg)
Exemple #2
0
 def delete_all_buckets_or_assert(servers, test_case):
     log = logger.Logger.get_logger()
     for serverInfo in servers:
         rest = RestConnection(serverInfo)
         buckets = []
         try:
             buckets = rest.get_buckets()
         except Exception as e:
             log.error(e)
             log.error(
                 '15 seconds sleep before calling get_buckets again...')
             time.sleep(15)
             buckets = rest.get_buckets()
         if len(buckets) > 0:
             log.info('deleting existing buckets {0} on {1}'.format(
                 [b.name for b in buckets], serverInfo.ip))
             for bucket in buckets:
                 log.info("remove bucket {0} ...".format(bucket.name))
                 try:
                     status = rest.delete_bucket(bucket.name)
                 except ServerUnavailableException as e:
                     log.error(e)
                     log.error(
                         '5 seconds sleep before calling delete_bucket again...'
                     )
                     time.sleep(5)
                     status = rest.delete_bucket(bucket.name)
                 if not status:
                     try:
                         BucketOperationHelper.print_dataStorage_content(
                             servers)
                         log.info(
                             StatsCommon.get_stats([serverInfo],
                                                   bucket.name, "timings"))
                     except:
                         log.error("Unable to get timings for bucket")
                 log.info('deleted bucket : {0} from {1}'.format(
                     bucket.name, serverInfo.ip))
                 msg = 'bucket "{0}" was not deleted even after waiting for two minutes'.format(
                     bucket.name)
                 if test_case:
                     if not BucketOperationHelper.wait_for_bucket_deletion(
                             bucket.name, rest, 200):
                         try:
                             BucketOperationHelper.print_dataStorage_content(
                                 servers)
                             log.info(
                                 StatsCommon.get_stats([serverInfo],
                                                       bucket.name,
                                                       "timings"))
                         except:
                             log.error("Unable to get timings for bucket")
                         test_case.fail(msg)
             log.info(
                 "sleep 2 seconds to make sure all buckets ({}) were deleted completely."
                 .format([b.name for b in buckets]))
             time.sleep(2)
Exemple #3
0
 def delete_all_buckets_or_assert(servers, test_case, timeout=200):
     log = logger.Logger.get_logger()
     for serverInfo in servers:
         if serverInfo.dummy:
             continue
         rest = RestConnection(serverInfo)
         # retrying to get buckets with poll_interval and limit of retries
         buckets = rest.get_buckets(num_retries=3, poll_interval=5)
         if len(buckets) > 0:
             log.info('deleting existing buckets {0} on {1}'.format(
                 [b.name for b in buckets], serverInfo.ip))
             for bucket in buckets:
                 # trying to send rest call to delete bucket with poll_interval and limit of retries
                 status = rest.delete_bucket(bucket.name,
                                             num_retries=3,
                                             poll_interval=5)
                 if not status:
                     try:
                         BucketOperationHelper.print_dataStorage_content(
                             servers)
                         log.info(
                             StatsCommon.get_stats([serverInfo],
                                                   bucket.name, "timings"))
                     except:
                         log.error("Unable to get timings for bucket")
                 # trying to check if bucket already deleted? poll_interval=0.1, timeout=200
                 is_bucket_deleted = BucketOperationHelper.wait_for_bucket_deletion(
                     bucket.name, rest, timeout)
                 if not is_bucket_deleted:
                     try:
                         BucketOperationHelper.print_dataStorage_content(
                             servers)
                         log.info(
                             StatsCommon.get_stats([serverInfo],
                                                   bucket.name, "timings"))
                     except:
                         log.error("Unable to get timings for bucket")
                     if test_case:
                         msg = 'bucket "{0}" was not deleted even after waiting for {1} seconds.'.format(
                             bucket.name, timeout)
                         test_case.fail(msg)
                 else:
                     log.info('deleted bucket : {0} from {1}'.format(
                         bucket.name, serverInfo.ip))
         else:
             log.info(
                 "Could not find any buckets for node {0}, nothing to delete"
                 .format(serverInfo.ip))
Exemple #4
0
    def checkpoint_collapse(self):
        """With 3 replicas, stop replication on R2, let Master and R1 close checkpoint.
        Run load until a new checkpoint is created on Master and R1.
        Wait till checkpoints merge on R1. Restart replication of R2.
        Checkpoint should advance to the latest on R2."""

        param = 'checkpoint'
        stat_key = 'vb_0:last_closed_checkpoint_id'
        stat_chk_itms = 'vb_0:num_checkpoint_items'

        self._set_checkpoint_size(self.servers[:self.num_servers], self.bucket,
                                  str(self.checkpoint_size))
        self._stop_replication(self.replica2, self.bucket)

        generate_load = BlobGenerator('nosql',
                                      'nosql-',
                                      self.value_size,
                                      end=self.num_items)
        data_load_thread = Thread(target=self._load_all_buckets,
                                  name="load_data",
                                  args=(self.master, generate_load, "create",
                                        0, 1, 0, True, self.checkpoint_size, 5,
                                        180))
        data_load_thread.start()
        m_stats = StatsCommon.get_stats([self.master], self.bucket, param,
                                        stat_key)

        tasks = []
        chk_pnt = int(m_stats[m_stats.keys()[0]]) + 2
        tasks.append(
            self.cluster.async_wait_for_stats([self.master], self.bucket,
                                              param, stat_key, '>=', chk_pnt))
        tasks.append(
            self.cluster.async_wait_for_stats([self.replica1], self.bucket,
                                              param, stat_key, '>=', chk_pnt))
        tasks.append(
            self.cluster.async_wait_for_stats([self.replica1], self.bucket,
                                              param, stat_chk_itms, '>=',
                                              self.num_items))
        data_load_thread.join()
        for task in tasks:
            try:
                task.result(60)
            except TimeoutError:
                self.fail("Checkpoint not collapsed")

        tasks = []
        self._start_replication(self.replica2, self.bucket)
        tasks.append(
            self.cluster.async_wait_for_stats([self.replica1], self.bucket,
                                              param, stat_chk_itms, '<',
                                              self.num_items))
        for task in tasks:
            try:
                task.result(60)
            except TimeoutError:
                self.fail("Checkpoints not replicated to replica2")

        self._verify_checkpoint_id(param, stat_key, m_stats)
        self._verify_stats_all_buckets(self.servers[:self.num_servers])
Exemple #5
0
    def checkpoint_create_items(self):
        """Load data until a new checkpoint is created on all replicas"""

        param = 'checkpoint'
        stat_key = 'vb_0:open_checkpoint_id'

        self._set_checkpoint_size(self.servers[:self.num_servers], self.bucket,
                                  str(self.checkpoint_size))
        chk_stats = StatsCommon.get_stats([self.master], self.bucket, param,
                                          stat_key)

        generate_load = BlobGenerator('nosql',
                                      'nosql-',
                                      self.value_size,
                                      end=self.num_items)
        self._load_all_buckets(self.master,
                               generate_load,
                               "create",
                               0,
                               1,
                               0,
                               True,
                               batch_size=self.checkpoint_size,
                               pause_secs=5,
                               timeout_secs=180)
        self._wait_for_stats_all_buckets(self.servers[:self.num_servers])

        self._verify_checkpoint_id(param, stat_key, chk_stats)
        self._verify_stats_all_buckets(self.servers[:self.num_servers])
Exemple #6
0
    def _verify_checkpoint_id(self, param, stat_key, m_stats):
        timeout = 60 if (self.num_items * .001) < 60 else self.num_items * .001

        #verify checkpiont id increases on master node
        chk_pnt = int(m_stats[m_stats.keys()[0]])
        tasks = []
        tasks.append(
            self.cluster.async_wait_for_stats([self.master], self.bucket,
                                              param, stat_key, '>', chk_pnt))
        for task in tasks:
            try:
                task.result(timeout)
            except TimeoutError:
                self.fail("New checkpoint not created")

        time.sleep(timeout / 10)
        # verify Master and all replicas are in sync with checkpoint ids
        m_stats = StatsCommon.get_stats([self.master], self.bucket, param,
                                        stat_key)
        chk_pnt = int(m_stats[m_stats.keys()[0]])
        tasks = []
        for server in self.servers:
            tasks.append(
                self.cluster.async_wait_for_stats([server], self.bucket, param,
                                                  stat_key, '==', chk_pnt))
        for task in tasks:
            try:
                task.result(timeout)
            except TimeoutError:
                self.fail(
                    "Master and all replicas are NOT in sync with checkpoint ids"
                )
Exemple #7
0
    def checkpoint_create_time(self):
        """Load data, but let the timeout create a new checkpoint on all replicas"""

        param = 'checkpoint'
        stat_key = 'vb_0:open_checkpoint_id'

        self._set_checkpoint_timeout(self.servers[:self.num_servers],
                                     self.bucket, str(self.timeout))

        generate_load = BlobGenerator('nosql',
                                      'nosql-',
                                      self.value_size,
                                      end=self.num_items)
        self._load_all_buckets(self.master,
                               generate_load,
                               "create",
                               0,
                               1,
                               0,
                               True,
                               batch_size=self.checkpoint_size,
                               pause_secs=5,
                               timeout_secs=180)
        self._wait_for_stats_all_buckets(self.servers[:self.num_servers])

        chk_stats = StatsCommon.get_stats([self.master], self.bucket, param,
                                          stat_key)
        self.log.info("Sleeping for {0} seconds)".format(self.timeout + 5))
        time.sleep(self.timeout + 5)
        self._verify_checkpoint_id(param, stat_key, chk_stats)
        self._verify_stats_all_buckets(self.servers[:self.num_servers])
    def _verify_checkpoint_id(self, param, stat_key, m_stats):
        timeout = 60 if (self.num_items * .001) < 60 else self.num_items * .001

        #verify checkpiont id increases on master node
        chk_pnt = int(m_stats[m_stats.keys()[0]])
        tasks = []
        tasks.append(self.cluster.async_wait_for_stats([self.master], self.bucket, param, stat_key, '>', chk_pnt))
        for task in tasks:
            try:
                task.result(timeout)
            except TimeoutError:
                self.fail("New checkpoint not created")

        time.sleep(timeout / 10)
        # verify Master and all replicas are in sync with checkpoint ids
        m_stats = StatsCommon.get_stats([self.master], self.bucket, param, stat_key)
        chk_pnt = int(m_stats[m_stats.keys()[0]])
        tasks = []
        for server in self.servers:
            tasks.append(self.cluster.async_wait_for_stats([server], self.bucket, param, stat_key, '==', chk_pnt))
        for task in tasks:
            try:
                task.result(timeout)
            except TimeoutError:
                self.fail("Master and all replicas are NOT in sync with checkpoint ids")
    def checkpoint_server_down(self):
        """Load N items. Shut down server R2. Then Restart R2 and
        verify backfill happens on R1 and R2."""

        param = 'checkpoint'
        stat_key = 'vb_0:open_checkpoint_id'
        rest = RestConnection(self.master)

        self._set_checkpoint_size(self.servers[:self.num_servers], self.bucket, self.checkpoint_size)
        generate_load_one = BlobGenerator('nosql', 'nosql-', self.value_size, end=self.num_items)
        self._load_all_buckets(self.master, generate_load_one, "create", 0, 1, 0, True, batch_size=self.checkpoint_size, pause_secs=5, timeout_secs=180)
        self._wait_for_stats_all_buckets(self.servers[:self.num_servers])
        prev_backfill_timestamp_R1 = self._get_backfill_timestamp(self.replica1, self.replica2)
        prev_backfill_timestamp_R2 = self._get_backfill_timestamp(self.replica2, self.replica3)

        m_stats = StatsCommon.get_stats([self.master], self.bucket, param, stat_key)
        self._stop_server(self.replica2)
        time.sleep(5)
        data_load_thread = Thread(target=self._load_data_use_workloadgen, name="load_data", args=(self.master,))
        data_load_thread.start()
        data_load_thread.join()
        self._start_server(self.replica2)
        time.sleep(5)

        self._verify_checkpoint_id(param, stat_key, m_stats)
        self._verify_backfill_happen(self.replica1, self.replica2, prev_backfill_timestamp_R1, True)
        self._verify_backfill_happen(self.replica2, self.replica3, prev_backfill_timestamp_R2, True)
Exemple #10
0
    def _warmup_check(self, timeout=1800):
        warmed_up = {}
        stats_all_buckets = {}
        for bucket in self.buckets:
            stats_all_buckets[bucket.name] = StatsCommon()
            warmed_up[bucket.name] = {}
            for server in self.servers:
                warmed_up[bucket.name][server] = False

        for bucket in self.buckets:
            for server in self.servers:
                start = time.time()
                end_time = start + timeout
                warmup_complete = False

                while not warmup_complete and time.time() < end_time:
                    try:
                        if stats_all_buckets[bucket.name].get_stats([server], bucket, 'warmup', 'ep_warmup_thread')[server] == "complete":
                            self.log.info("warmup completed for %s in bucket %s" % (server.ip, bucket.name))
                            warmup_complete = True
                        elif stats_all_buckets[bucket.name].get_stats([server], bucket, 'warmup', 'ep_warmup_thread')[server] == "running":
                            self.log.info("warming up is still running for %s in bucket %s....curr_items_tot : %s" %
                                          (server.ip, bucket.name, stats_all_buckets[bucket.name].get_stats([server], bucket, '', 'curr_items_tot')[server]))

                        warmup_time = int(stats_all_buckets[bucket.name].get_stats([server], bucket, 'warmup', 'ep_warmup_time')[server])
                        if warmup_time is not None:
                            self.log.info("ep_warmup_time is %s for %s in bucket %s" % (warmup_time, server.ip, bucket.name))
                        self.sleep(5, "waiting for warmup...")
                    except Exception as e:
                        self.log.error("Could not get warmup_time stats from server %s:%s, exception %s" % (server.ip, server.port, e))

                self.assertTrue(warmup_complete, "Warm up wasn't complete in %s sec" % timeout)

                start = time.time()
                while time.time() - start < self.timeout and not warmed_up[bucket.name][server]:
                    if stats_all_buckets[bucket.name].get_stats([server], bucket, '', 'curr_items_tot')[server] == \
                        self.pre_warmup_stats[bucket.name]["%s:%s" % (server.ip, server.port)]["curr_items_tot"]:
                        if stats_all_buckets[bucket.name].get_stats([server], bucket, '', 'curr_items')[server] == \
                           self.pre_warmup_stats[bucket.name]["%s:%s" % (server.ip, server.port)]["curr_items"]:
                            if self._warmup_check_without_access_log():
                                warmed_up[bucket.name][server] = True
                                self._stats_report(server, bucket, stats_all_buckets[bucket.name])
                        else:
                            self.log.info("curr_items is %s not equal to %s" % (stats_all_buckets[bucket.name].get_stats([server], bucket, '', 'curr_items')[server],
                                                                                    self.pre_warmup_stats[bucket.name]["%s:%s" % (server.ip, server.port)]["curr_items"]))
                    else:
                        self.log.info("curr_items_tot is %s not equal to %s" % (stats_all_buckets[bucket.name].get_stats([server], bucket, '', 'curr_items_tot')[server],
                                                                            self.pre_warmup_stats[bucket.name]["%s:%s" % (server.ip, server.port)]["curr_items_tot"]))

                    self.sleep(10)

        for bucket in self.buckets:
            for server in self.servers:
                if warmed_up[bucket.name][server] == True:
                    continue
                elif warmed_up[bucket.name][server] == False:
                    return False
        return True
    def _warmup_check_without_access_log(self):
        if not self.without_access_log:
            return True

        warmed_up = {}
        stats_all_buckets = {}
        for bucket in self.buckets:
            stats_all_buckets[bucket.name] = StatsCommon()
            warmed_up[bucket.name] = {}
            for server in self.servers:
                warmed_up[bucket.name][server] = False

        for bucket in self.buckets:
            for server in self.servers:
                start = time.time()
                while time.time() - start < self.timeout and not warmed_up[
                        bucket.name][server]:
                    if stats_all_buckets[bucket.name].get_stats([server], bucket, 'warmup', 'ep_warmup_key_count')[server] >= \
                       stats_all_buckets[bucket.name].get_stats([server], bucket, 'warmup', 'ep_warmup_min_item_threshold')[server] or \
                       stats_all_buckets[bucket.name].get_stats([server], bucket, '', 'mem_used')[server] >= \
                       stats_all_buckets[bucket.name].get_stats([server], bucket, 'warmup', 'ep_warmup_min_memory_threshold')[server] or \
                       stats_all_buckets[bucket.name].get_stats([server], bucket, '', 'mem_used')[server] >= \
                       stats_all_buckets[bucket.name].get_stats([server], bucket, '', 'ep_mem_low_wat')[server]:
                        warmed_up[bucket.name][server] = True
                    else:
                        self.log.info(
                            "curr_items is %s and ep_warmup_min_item_threshold is %s"
                            % (stats_all_buckets[bucket.name].get_stats(
                                [server], bucket, '', 'curr_items')[server],
                               stats_all_buckets[bucket.name].get_stats(
                                   [server], bucket, 'warmup',
                                   'ep_warmup_min_item_threshold')[server]))
                        self.log.info(
                            "vb_active_perc_mem_resident is %s and ep_warmup_min_memory_threshold is %s"
                            % (stats_all_buckets[bucket.name].get_stats(
                                [server], bucket, '',
                                'vb_active_perc_mem_resident')[server],
                               stats_all_buckets[bucket.name].get_stats(
                                   [server], bucket, 'warmup',
                                   'ep_warmup_min_memory_threshold')[server]))
                        self.log.info(
                            "mem_used is %s and ep_mem_low_wat is %s" %
                            (stats_all_buckets[bucket.name].get_stats(
                                [server], bucket, '', 'mem_used')[server],
                             stats_all_buckets[bucket.name].get_stats(
                                 [server], bucket, '',
                                 'ep_mem_low_wat')[server]))

                    self.sleep(5)

        for bucket in self.buckets:
            for server in self.servers:
                if warmed_up[bucket.name][server] == True:
                    continue
                elif warmed_up[bucket.name][server] == False:
                    return False
        return True
Exemple #12
0
 def _get_backfill_timestamp(self, server, replica_server):
     param = 'tap'
     stat_key = 'eq_tapq:replication_ns_1@%s:backfill_start_timestamp' % (
         replica_server.ip)
     m_stats = StatsCommon.get_stats([server], self.bucket, param, stat_key)
     self.log.info(
         "eq_tapq:replication_ns_1@%s:backfill_start_timestamp: %s" %
         (replica_server.ip, m_stats[m_stats.keys()[0]]))
     return int(m_stats[m_stats.keys()[0]])
Exemple #13
0
 def delete_all_buckets_or_assert(servers, test_case):
     log = logger.Logger.get_logger()
     for serverInfo in servers:
         rest = RestConnection(serverInfo)
         buckets = []
         try:
             buckets = rest.get_buckets()
         except Exception as e:
             log.error(e)
             log.error('15 seconds sleep before calling get_buckets again...')
             time.sleep(15)
             buckets = rest.get_buckets()
         log.info('deleting existing buckets {0} on {1}'.format([b.name for b in buckets], serverInfo.ip))
         for bucket in buckets:
             log.info("remove bucket {0} ...".format(bucket.name))
             try:
                 status = rest.delete_bucket(bucket.name)
             except ServerUnavailableException as e:
                 log.error(e)
                 log.error('5 seconds sleep before calling delete_bucket again...')
                 time.sleep(5)
                 status = rest.delete_bucket(bucket.name)
             if not status:
                 try:
                     BucketOperationHelper.print_dataStorage_content(servers)
                     log.info(StatsCommon.get_stats([serverInfo], bucket.name, "timings"))
                 except:
                     log.error("Unable to get timings for bucket")
             log.info('deleted bucket : {0} from {1}'.format(bucket.name, serverInfo.ip))
             msg = 'bucket "{0}" was not deleted even after waiting for two minutes'.format(bucket.name)
             if test_case:
                 if not BucketOperationHelper.wait_for_bucket_deletion(bucket.name, rest, 200):
                     try:
                         BucketOperationHelper.print_dataStorage_content(servers)
                         log.info(StatsCommon.get_stats([serverInfo], bucket.name, "timings"))
                     except:
                         log.error("Unable to get timings for bucket")
                     test_case.fail(msg)
         log.info("sleep 2 seconds to make sure all buckets were deleted completely.")
         time.sleep(2)
Exemple #14
0
    def checkpoint_replication_pause(self):
        """With 3 replicas load data. pause replication to R2. Let checkpoints close on Master and R1.
        Restart replication of R2 and R3, backfill should not be seen on R1 and R2."""

        param = 'checkpoint'
        stat_key = 'vb_0:last_closed_checkpoint_id'

        self._set_checkpoint_size(self.servers[:self.num_servers], self.bucket,
                                  str(self.checkpoint_size))
        time.sleep(5)
        prev_backfill_timestamp_R1 = self._get_backfill_timestamp(
            self.replica1, self.replica2)
        prev_backfill_timestamp_R2 = self._get_backfill_timestamp(
            self.replica2, self.replica3)

        generate_load = BlobGenerator('nosql',
                                      'nosql-',
                                      self.value_size,
                                      end=self.num_items)
        data_load_thread = Thread(target=self._load_all_buckets,
                                  name="load_data",
                                  args=(self.master, generate_load, "create",
                                        0, 1, 0, True, self.checkpoint_size, 5,
                                        180))
        data_load_thread.start()
        self._stop_replication(self.replica2, self.bucket)

        m_stats = StatsCommon.get_stats([self.master], self.bucket, param,
                                        stat_key)
        chk_pnt = int(m_stats[m_stats.keys()[0]]) + 2
        tasks = []
        tasks.append(
            self.cluster.async_wait_for_stats([self.master], self.bucket,
                                              param, stat_key, '>=', chk_pnt))
        tasks.append(
            self.cluster.async_wait_for_stats([self.replica1], self.bucket,
                                              param, stat_key, '>=', chk_pnt))
        for task in tasks:
            try:
                task.result(60)
            except TimeoutError:
                self.fail("Checkpoint not closed")

        data_load_thread.join()
        self._start_replication(self.replica2, self.bucket)

        self._verify_checkpoint_id(param, stat_key, m_stats)
        self._verify_stats_all_buckets(self.servers[:self.num_servers])
        self._verify_backfill_happen(self.replica1, self.replica2,
                                     prev_backfill_timestamp_R1)
        self._verify_backfill_happen(self.replica2, self.replica3,
                                     prev_backfill_timestamp_R2)
Exemple #15
0
    def _create_access_log(self):
        stats_all_buckets = {}
        for bucket in self.buckets:
            stats_all_buckets[bucket.name] = StatsCommon()

        for bucket in self.buckets:
            for server in self.servers:
                scanner_runs = stats_all_buckets[bucket.name].get_stats([server], bucket, '', 'ep_num_access_scanner_runs')[server]
                self.log.info("current access scanner run for %s in bucket %s is %s times" % (server.ip, bucket.name, scanner_runs))
                self.log.info("setting access scanner time %s minutes for %s in bucket %s" % (self.access_log_time, server.ip, bucket.name))
                ClusterOperationHelper.flushctl_set(server, "alog_sleep_time", self.access_log_time , bucket.name)
                if not self._wait_for_access_run(self.access_log_time, scanner_runs, server, bucket, stats_all_buckets[bucket.name]):
                    self.fail("Not able to create access log within %s minutes" % self.access_log_time)
Exemple #16
0
    def delete_bucket_or_assert(serverInfo, bucket='default', test_case=None):
        log = logger.Logger.get_logger()
        log.info('deleting existing bucket {0} on {1}'.format(bucket, serverInfo))

        rest = RestConnection(serverInfo)
        if RestHelper(rest).bucket_exists(bucket):
            status = rest.delete_bucket(bucket)
            if not status:
                try:
                    BucketOperationHelper.print_dataStorage_content([serverInfo])
                    log.info(StatsCommon.get_stats([serverInfo], bucket, "timings"))
                except:
                    log.error("Unable to get timings for bucket")
            log.info('deleted bucket : {0} from {1}'.format(bucket, serverInfo.ip))
        msg = 'bucket "{0}" was not deleted even after waiting for two minutes'.format(bucket)
        if test_case:
            if not BucketOperationHelper.wait_for_bucket_deletion(bucket, rest, 200):
                try:
                    BucketOperationHelper.print_dataStorage_content([serverInfo])
                    log.info(StatsCommon.get_stats([serverInfo], bucket, "timings"))
                except:
                    log.error("Unable to get timings for bucket")
                test_case.fail(msg)
Exemple #17
0
    def checkpoint_create_items(self):
        """Load data until a new checkpoint is created on all replicas"""

        param = 'checkpoint'
        stat_key = 'vb_0:open_checkpoint_id'

        self._set_checkpoint_size(self.servers[:self.num_servers], self.bucket, str(self.checkpoint_size))
        chk_stats = StatsCommon.get_stats([self.master], self.bucket, param, stat_key)

        generate_load = BlobGenerator('nosql', 'nosql-', self.value_size, end=self.num_items)
        self._load_all_buckets(self.master, generate_load, "create", 0, 1, 0, True, batch_size=self.checkpoint_size, pause_secs=5, timeout_secs=180)
        self._wait_for_stats_all_buckets(self.servers[:self.num_servers])

        self._verify_checkpoint_id(param, stat_key, chk_stats)
        self._verify_stats_all_buckets(self.servers[:self.num_servers])
Exemple #18
0
    def checkpoint_collapse(self):
        """With 3 replicas, stop replication on R2, let Master and R1 close checkpoint.
        Run load until a new checkpoint is created on Master and R1.
        Wait till checkpoints merge on R1. Restart replication of R2.
        Checkpoint should advance to the latest on R2."""

        param = 'checkpoint'
        stat_key = 'vb_0:last_closed_checkpoint_id'
        stat_chk_itms = 'vb_0:num_checkpoint_items'

        self._set_checkpoint_size(self.servers[:self.num_servers], self.bucket, str(self.checkpoint_size))
        self._stop_replication(self.replica2, self.bucket)

        generate_load = BlobGenerator('nosql', 'nosql-', self.value_size, end=self.num_items)
        data_load_thread = Thread(target=self._load_all_buckets,
                                  name="load_data",
                                  args=(self.master, generate_load, "create", 0, 1, 0, True, self.checkpoint_size, 5, 180))
        data_load_thread.start()
        m_stats = StatsCommon.get_stats([self.master], self.bucket, param, stat_key)

        tasks = []
        chk_pnt = int(m_stats[m_stats.keys()[0]]) + 2
        tasks.append(self.cluster.async_wait_for_stats([self.master], self.bucket, param, stat_key,
                                                       '>=', chk_pnt))
        tasks.append(self.cluster.async_wait_for_stats([self.replica1], self.bucket, param, stat_key,
                                                       '>=', chk_pnt))
        tasks.append(self.cluster.async_wait_for_stats([self.replica1], self.bucket, param,
                                                       stat_chk_itms, '>=', self.num_items))
        data_load_thread.join()
        for task in tasks:
            try:
                task.result(60)
            except TimeoutError:
                self.fail("Checkpoint not collapsed")

        tasks = []
        self._start_replication(self.replica2, self.bucket)
        tasks.append(self.cluster.async_wait_for_stats([self.replica1], self.bucket, param,
                                                       stat_chk_itms, '<', self.num_items))
        for task in tasks:
            try:
                task.result(60)
            except TimeoutError:
                self.fail("Checkpoints not replicated to replica2")

        self._verify_checkpoint_id(param, stat_key, m_stats)
        self._verify_stats_all_buckets(self.servers[:self.num_servers])
Exemple #19
0
    def checkpoint_server_down(self):
        """Load N items. Shut down server R2. Then Restart R2 and
        verify backfill happens on R1 and R2."""

        param = 'checkpoint'
        stat_key = 'vb_0:open_checkpoint_id'
        rest = RestConnection(self.master)

        self._set_checkpoint_size(self.servers[:self.num_servers], self.bucket,
                                  self.checkpoint_size)
        generate_load_one = BlobGenerator('nosql',
                                          'nosql-',
                                          self.value_size,
                                          end=self.num_items)
        self._load_all_buckets(self.master,
                               generate_load_one,
                               "create",
                               0,
                               1,
                               0,
                               True,
                               batch_size=self.checkpoint_size,
                               pause_secs=5,
                               timeout_secs=180)
        self._wait_for_stats_all_buckets(self.servers[:self.num_servers])
        prev_backfill_timestamp_R1 = self._get_backfill_timestamp(
            self.replica1, self.replica2)
        prev_backfill_timestamp_R2 = self._get_backfill_timestamp(
            self.replica2, self.replica3)

        m_stats = StatsCommon.get_stats([self.master], self.bucket, param,
                                        stat_key)
        self._stop_server(self.replica2)
        time.sleep(5)
        data_load_thread = Thread(target=self._load_data_use_workloadgen,
                                  name="load_data",
                                  args=(self.master, ))
        data_load_thread.start()
        data_load_thread.join()
        self._start_server(self.replica2)
        time.sleep(5)

        self._verify_checkpoint_id(param, stat_key, m_stats)
        self._verify_backfill_happen(self.replica1, self.replica2,
                                     prev_backfill_timestamp_R1, True)
        self._verify_backfill_happen(self.replica2, self.replica3,
                                     prev_backfill_timestamp_R2, True)
Exemple #20
0
    def checkpoint_replication_pause_failover(self):
        """Load N items. Stop replication R3. Load N' more items.
        Failover R2. When restart replication to R3, verify backfill doesn't happen on R1."""

        param = 'checkpoint'
        stat_key = 'vb_0:open_checkpoint_id'
        rest = RestConnection(self.master)
        nodes = rest.node_statuses()
        failover_node = None
        for node in nodes:
            if node.id.find(self.replica2.ip) >= 0:
                failover_node = node

        self._set_checkpoint_size(self.servers[:self.num_servers], self.bucket, self.checkpoint_size)
        generate_load_one = BlobGenerator('nosql', 'nosql-', self.value_size, end=self.num_items)
        self._load_all_buckets(self.master, generate_load_one, "create", 0, 1, 0, True, batch_size=self.checkpoint_size, pause_secs=5, timeout_secs=180)
        self._wait_for_stats_all_buckets(self.servers[:self.num_servers])
        prev_backfill_timestamp_R1 = self._get_backfill_timestamp(self.replica1, self.replica2)
        m_stats = StatsCommon.get_stats([self.master], self.bucket, param, stat_key)
        self._stop_replication(self.replica3, self.bucket)

        generate_load_two = BlobGenerator('sqlite', 'sqlite-', self.value_size, end=self.num_items)
        data_load_thread = Thread(target=self._load_all_buckets,
                                          name="load_data",
                                          args=(self.master, generate_load_two, "create", 0, 1, 0, True, self.checkpoint_size, 5, 180))
        data_load_thread.start()

        failed_over = rest.fail_over(failover_node.id)
        if not failed_over:
            self.log.info("unable to failover the node the first time. try again in  60 seconds..")
            #try again in 60 seconds
            time.sleep(75)
            failed_over = rest.fail_over(failover_node.id)
        self.assertTrue(failed_over, "unable to failover node %s".format(self.replica2.ip))
        self.log.info("failed over node : {0}".format(failover_node.id))
        data_load_thread.join()
        self._start_replication(self.replica3, self.bucket)

        self.servers = []
        self.servers = [self.master, self.replica1, self.replica3]
        self.num_servers = len(self.servers)
        self._verify_checkpoint_id(param, stat_key, m_stats)
        self._verify_stats_all_buckets(self.servers[:self.num_servers])
        self._verify_backfill_happen(self.replica1, self.replica2, prev_backfill_timestamp_R1)
        self.cluster.rebalance([self.master, self.replica1, self.replica2, self.replica3], [], [self.replica2])
        self.cluster.rebalance([self.master, self.replica1, self.replica3], [self.replica2], [])
Exemple #21
0
    def checkpoint_create_time(self):
        """Load data, but let the timeout create a new checkpoint on all replicas"""

        param = 'checkpoint'
        stat_key = 'vb_0:open_checkpoint_id'

        self._set_checkpoint_timeout(self.servers[:self.num_servers], self.bucket, str(self.timeout))

        generate_load = BlobGenerator('nosql', 'nosql-', self.value_size, end=self.num_items)
        self._load_all_buckets(self.master, generate_load, "create", 0, 1, 0, True, batch_size=self.checkpoint_size, pause_secs=5, timeout_secs=180)
        self._wait_for_stats_all_buckets(self.servers[:self.num_servers])

        chk_stats = StatsCommon.get_stats([self.master], self.bucket, param, stat_key)
        self.log.info("Sleeping for {0} seconds)".format(self.timeout + 5))
        time.sleep(self.timeout + 5)
        self._verify_checkpoint_id(param, stat_key, chk_stats)
        self._verify_stats_all_buckets(self.servers[:self.num_servers])
Exemple #22
0
    def checkpoint_deduplication(self):
        """Disable replication of R1. Load N items to master, then mutate some of them.
        Restart replication of R1, only N items should be in stats. In this test, we can
        only load number of items <= checkpoint_size to observe deduplication"""

        param = 'checkpoint'
        stat_key = 'vb_0:num_open_checkpoint_items'
        stat_key_id = 'vb_0:open_checkpoint_id'

        self._set_checkpoint_size(self.servers[:self.num_servers], self.bucket, self.checkpoint_size)
        self._stop_replication(self.replica1, self.bucket)

        generate_load = BlobGenerator('nosql', 'nosql-', self.value_size, end=self.num_items)
        generate_update = BlobGenerator('nosql', 'sql-', self.value_size, end=self.num_items)
        self._load_all_buckets(self.master, generate_load, "create", 0, 1, 0, True, batch_size=self.checkpoint_size, pause_secs=5, timeout_secs=180)
        self._wait_for_stats_all_buckets([self.master, self.replica2, self.replica3])
        m_stats = StatsCommon.get_stats([self.master], self.bucket, param, stat_key_id)
        data_load_thread = Thread(target=self._load_all_buckets,
                                  name="load_data",
                                  args=(self.master, generate_update, "update", 0, 1, 0, True, self.checkpoint_size, 5, 180))
        data_load_thread.start()
        self._start_replication(self.replica1, self.bucket)
        data_load_thread.join()

        chk_pnt = int(m_stats[m_stats.keys()[0]])
        timeout = 60 if (self.num_items * .001) < 60 else self.num_items * .001
        time.sleep(timeout)
        tasks = []
        tasks.append(self.cluster.async_wait_for_stats([self.master], self.bucket, param,
                                                       stat_key, '==', self.num_items))
        tasks.append(self.cluster.async_wait_for_stats([self.replica1], self.bucket, param,
                                                       stat_key, '==', self.num_items))
        tasks.append(self.cluster.async_wait_for_stats([self.master], self.bucket, param,
                                                       stat_key_id, '==', chk_pnt))
        tasks.append(self.cluster.async_wait_for_stats([self.replica1], self.bucket, param,
                                                       stat_key_id, '==', chk_pnt))

        for task in tasks:
            try:
                task.result(60)
            except TimeoutError:
                self.fail("Items weren't deduplicated")

        self._verify_stats_all_buckets(self.servers[:self.num_servers])
Exemple #23
0
    def checkpoint_replication_pause(self):
        """With 3 replicas load data. pause replication to R2. Let checkpoints close on Master and R1.
        Restart replication of R2 and R3, backfill should not be seen on R1 and R2."""

        param = 'checkpoint'
        stat_key = 'vb_0:last_closed_checkpoint_id'

        self._set_checkpoint_size(self.servers[:self.num_servers], self.bucket, str(self.checkpoint_size))
        time.sleep(5)
        prev_backfill_timestamp_R1 = self._get_backfill_timestamp(self.replica1, self.replica2)
        prev_backfill_timestamp_R2 = self._get_backfill_timestamp(self.replica2, self.replica3)

        generate_load = BlobGenerator('nosql', 'nosql-', self.value_size, end=self.num_items)
        data_load_thread = Thread(target=self._load_all_buckets,
                                  name="load_data",
                                  args=(self.master, generate_load, "create", 0, 1, 0, True, self.checkpoint_size, 5, 180))
        data_load_thread.start()
        self._stop_replication(self.replica2, self.bucket)

        m_stats = StatsCommon.get_stats([self.master], self.bucket, param, stat_key)
        chk_pnt = int(m_stats[m_stats.keys()[0]]) + 2
        tasks = []
        tasks.append(self.cluster.async_wait_for_stats([self.master], self.bucket, param, stat_key,
                                                       '>=', chk_pnt))
        tasks.append(self.cluster.async_wait_for_stats([self.replica1], self.bucket, param, stat_key,
                                                       '>=', chk_pnt))
        for task in tasks:
            try:
                task.result(60)
            except TimeoutError:
                self.fail("Checkpoint not closed")

        data_load_thread.join()
        self._start_replication(self.replica2, self.bucket)

        self._verify_checkpoint_id(param, stat_key, m_stats)
        self._verify_stats_all_buckets(self.servers[:self.num_servers])
        self._verify_backfill_happen(self.replica1, self.replica2, prev_backfill_timestamp_R1)
        self._verify_backfill_happen(self.replica2, self.replica3, prev_backfill_timestamp_R2)
Exemple #24
0
    def load_document_until_ram_percentage(self):
        self.start = 0
        self.num_items = 30000
        self.end = self.num_items
        while True:
            self.log.info("Add documents to bucket")
            self.perform_doc_ops_in_all_cb_buckets(self.num_items, "create",
                                                   self.start, self.end)

            self.log.info("Calculate available free memory")
            stats_all_buckets = {}
            stats_all_buckets[self.cb_bucket_name] = StatsCommon()
            memory_used = int(stats_all_buckets[self.cb_bucket_name].get_stats(
                [self.master], self.cb_bucket_name, '',
                'mem_used')[self.servers[0]])

            if memory_used < (self.document_ram_percentage * self.bucket_ram *
                              1000000):
                self.log.info("Continue loading we have more free memory")
                self.start = self.end
                self.end = self.end + self.num_items
            else:
                break
    def test_verify_mb8825(self):
        # Setting up replication clusters.
        src_cluster_name, dest_cluster_name = "remote-dest-src", "remote-src-dest"
        self.__setup_replication_clusters(self.src_master, self.dest_master, src_cluster_name, dest_cluster_name)

        # Step-3 Load 10k items ( sets=80, deletes=20) on source cluster.
        self._load_all_buckets(self.src_master, self.gen_create, "create", 0)

        # Step-4 XDCR Source -> Remote
        self._replicate_clusters(self.src_master, dest_cluster_name)
        self.merge_buckets(self.src_master, self.dest_master, bidirection=False)

        # Step-5 Wait for replication to finish 50% at destination node
        expected_items = (self.gen_create.end) * 0.5
        dest_master_buckets = self._get_cluster_buckets(self.dest_master)

        tasks = []
        for bucket in dest_master_buckets:
            tasks.append(self.cluster.async_wait_for_stats([self.dest_master], bucket, '', 'curr_items', '>=', expected_items))
        for task in tasks:
            task.result(self.wait_timeout * 5)

        # Perform 20% delete on Source cluster.
        tasks = []
        self.gen_delete = BlobGenerator('loadOne', 'loadOne-', self._value_size, start=0, end=int((self.num_items) * (float)(self._percent_delete) / 100))
        tasks.extend(self._async_load_all_buckets(self.src_master, self.gen_delete, "delete", 0))

        # Step-6 XDCR Remote -> Source
        self._replicate_clusters(self.dest_master, src_cluster_name)
        self.merge_buckets(self.dest_master, self.src_master, bidirection=False)

        # Wait for delete tasks to be finished
        for task in tasks:
            task.result()

        # Step-8 Compare the source and destination cluster items - item count, meta data, data content.
        self.verify_results()

        # Verify if no deletion performed at source node:
        src_master_buckets = self._get_cluster_buckets(self.src_master)
        for bucket in src_master_buckets:
            src_stat_ep_num_ops_del_meta = 0
            src_stat_ep_num_ops_set_meta = 0
            src_stat_ep_num_ops_get_meta = 0
            src_stat_ep_num_ops_del_meta_res_fail = 0
            src_stat_ep_num_ops_set_meta_res_fail = 0
            for src_node in self.src_nodes:
                src_stat_ep_num_ops_del_meta += int(StatsCommon.get_stats([src_node], bucket, '', 'ep_num_ops_del_meta')[src_node])
                src_stat_ep_num_ops_set_meta += int(StatsCommon.get_stats([src_node], bucket, '', 'ep_num_ops_set_meta')[src_node])
                src_stat_ep_num_ops_get_meta += int(StatsCommon.get_stats([src_node], bucket, '', 'ep_num_ops_get_meta')[src_node])
                src_stat_ep_num_ops_del_meta_res_fail += int(StatsCommon.get_stats([src_node], bucket, '', 'ep_num_ops_del_meta_res_fail')[src_node])
                src_stat_ep_num_ops_set_meta_res_fail += int(StatsCommon.get_stats([src_node], bucket, '', 'ep_num_ops_set_meta_res_fail')[src_node])

            self.assertEqual(src_stat_ep_num_ops_set_meta, 0, "Number of set [%s] operation occurs at bucket = %s, while expected to 0" % (src_stat_ep_num_ops_set_meta, bucket))
            self.assertEqual(src_stat_ep_num_ops_del_meta, 0, "Number of delete [%s] operation occurs at bucket = %s, while expected to 0" % (src_stat_ep_num_ops_del_meta, bucket))

            dest_stat_ep_num_ops_del_meta = 0
            for dest_node in self.dest_nodes:
                dest_stat_ep_num_ops_del_meta += int(StatsCommon.get_stats([dest_node], bucket, '', 'ep_num_ops_del_meta')[dest_node])

            if self.rep_type == "xmem":
                self.assertEqual(src_stat_ep_num_ops_del_meta_res_fail, dest_stat_ep_num_ops_del_meta, "Number of failed delete [%s] operation occurs at bucket = %s, while expected to %s" % (src_stat_ep_num_ops_del_meta_res_fail, bucket, dest_stat_ep_num_ops_del_meta))
                self.assertTrue(src_stat_ep_num_ops_set_meta_res_fail > 0, "Number of failed set [%s] operation occurs at bucket = %s, while expected greater than 0" % (src_stat_ep_num_ops_set_meta_res_fail, bucket))

            elif self.rep_type == "capi":
                self.assertTrue(src_stat_ep_num_ops_get_meta > 0, "Number of get [%s] operation occurs at bucket = %s, while expected greater than 0" % (src_stat_ep_num_ops_get_meta, bucket))
    def test_items_append(self):
        self.desired_item_size = self.input.param("desired_item_size", 2048)
        self.append_size = self.input.param("append_size", 1024)
        self.fixed_append_size = self.input.param("fixed_append_size", True)
        self.append_ratio = self.input.param("append_ratio", 0.5)
        self._load_all_buckets(self.master, self.gen_create, "create", 0,
                               batch_size=1000, pause_secs=5, timeout_secs=100)

        for bucket in self.buckets:
            self.value_size = self.input.param("value_size", 512)
            verify_dict = {}
            vkeys, dkeys = bucket.kvs[1].key_set()

            key_count = len(vkeys)
            app_ratio = self.append_ratio * key_count
            selected_keys = []
            i = 0
            for key in vkeys:
                i += 1
                if i >= app_ratio:
                    break
                selected_keys.append(key)

            awareness = VBucketAwareMemcached(RestConnection(self.master), bucket.name)
            if self.kv_verify:
                for key in selected_keys:
                    value = awareness.memcached(key).get(key)[2]
                    verify_dict[key] = value

            self.log.info("Bucket: {0}".format(bucket.name))
            self.log.info("Appending to have items whose initial size was "
                            + "{0} to equal or cross a size of {1}".format(self.value_size, self.desired_item_size))
            self.log.info("Item-appending of {0} items starting ..".format(len(selected_keys) + 1))

            index = 3
            while self.value_size < self.desired_item_size:
                str_len = self.append_size
                if not self.fixed_append_size:
                    str_len = int(math.pow(2, index))

                for key in selected_keys:
                    random_string = self.random_str_generator(str_len)
                    awareness.memcached(key).append(key, random_string)
                    if self.kv_verify:
                        verify_dict[key] = verify_dict[key] + random_string
                self.log.info("for {0} items size was increased to {1} Bytes".format(len(selected_keys) + 1, self.value_size))
                self.value_size += str_len
                index += 1

            self.log.info("The appending of {0} items ended".format(len(selected_keys) + 1))

        for bucket in self.buckets:
            msg = "Bucket:{0}".format(bucket.name)
            self.log.info("VERIFICATION <" + msg + ">: Phase 0 - Check the gap between "
                      + "mem_used by the bucket and total_allocated_bytes")
            stats = StatsCommon()
            mem_used_stats = stats.get_stats(self.servers, bucket, 'memory', 'mem_used')
            total_allocated_bytes_stats = stats.get_stats(self.servers, bucket, 'memory', 'total_allocated_bytes')
            total_fragmentation_bytes_stats = stats.get_stats(self.servers, bucket, 'memory', 'total_fragmentation_bytes')

            for server in self.servers:
                self.log.info("In {0} bucket {1}, total_fragmentation_bytes + the total_allocated_bytes = {2}"
                              .format(server.ip, bucket.name, (int(total_fragmentation_bytes_stats[server]) + int(total_allocated_bytes_stats[server]))))
                self.log.info("In {0} bucket {1}, mem_used = {2}".format(server.ip, bucket.name, mem_used_stats[server]))
                self.log.info("In {0} bucket {1}, the difference between actual memory used by memcached and mem_used is {2} times"
                              .format(server.ip, bucket.name, float(int(total_fragmentation_bytes_stats[server]) + int(total_allocated_bytes_stats[server])) / float(mem_used_stats[server])))


            self.log.info("VERIFICATION <" + msg + ">: Phase1 - Check if any of the "
                    + "selected keys have value less than the desired value size")
            for key in selected_keys:
                value = awareness.memcached(key).get(key)[2]
                if len(value) < self.desired_item_size:
                    self.fail("Failed to append enough to make value size surpass the "
                                + "size {0}, key {1} has size {2}".format(self.desired_item_size, key, len(value)))

            if self.kv_verify:
                self.log.info("VERIFICATION <" + msg + ">: Phase2 - Check if the content "
                        + "after the appends match what's expected")
                for k in verify_dict:
                    if awareness.memcached(k).get(k)[2] != verify_dict[k]:
                        self.fail("Content at key {0}: not what's expected.".format(k))
                self.log.info("VERIFICATION <" + msg + ">: Successful")

        shell = RemoteMachineShellConnection(self.master)
        shell.execute_cbstats("", "raw", keyname="allocator", vbid="")
        shell.disconnect()
 def stat(self, key):
     stats = StatsCommon.get_stats([self.master], 'default', "", key)
     val = stats.values()[0]
     if val.isdigit():
         val = int(val)
     return val
 def stat(self, key):
     stats =  StatsCommon.get_stats([self.master], 'default', "", key)
     val = stats.values()[0]
     if val.isdigit():
         val = int(val)
     return val
Exemple #29
0
    def test_items_append(self):
        self.desired_item_size = self.input.param("desired_item_size", 2048)
        self.append_size = self.input.param("append_size", 1024)
        self.fixed_append_size = self.input.param("fixed_append_size", True)
        self.append_ratio = self.input.param("append_ratio", 0.5)
        self._load_all_buckets(self.master,
                               self.gen_create,
                               "create",
                               0,
                               batch_size=1000,
                               pause_secs=5,
                               timeout_secs=100)

        for bucket in self.buckets:
            self.value_size = self.input.param("value_size", 512)
            verify_dict = {}
            vkeys, dkeys = bucket.kvs[1].key_set()

            key_count = len(vkeys)
            app_ratio = self.append_ratio * key_count
            selected_keys = []
            i = 0
            for key in vkeys:
                i += 1
                if i >= app_ratio:
                    break
                selected_keys.append(key)

            awareness = VBucketAwareMemcached(RestConnection(self.master),
                                              bucket.name)
            if self.kv_verify:
                for key in selected_keys:
                    value = awareness.memcached(key).get(key)[2]
                    verify_dict[key] = value

            self.log.info("Bucket: {0}".format(bucket.name))
            self.log.info("Appending to have items whose initial size was " +
                          "{0} to equal or cross a size of {1}".format(
                              self.value_size, self.desired_item_size))
            self.log.info("Item-appending of {0} items starting ..".format(
                len(selected_keys) + 1))

            index = 3
            while self.value_size < self.desired_item_size:
                str_len = self.append_size
                if not self.fixed_append_size:
                    str_len = int(math.pow(2, index))

                for key in selected_keys:
                    random_string = self.random_str_generator(str_len)
                    awareness.memcached(key).append(key, random_string)
                    if self.kv_verify:
                        verify_dict[key] = verify_dict[key] + random_string
                self.log.info(
                    "for {0} items size was increased to {1} Bytes".format(
                        len(selected_keys) + 1, self.value_size))
                self.value_size += str_len
                index += 1

            self.log.info("The appending of {0} items ended".format(
                len(selected_keys) + 1))

        for bucket in self.buckets:
            msg = "Bucket:{0}".format(bucket.name)
            self.log.info("VERIFICATION <" + msg +
                          ">: Phase 0 - Check the gap between " +
                          "mem_used by the bucket and total_allocated_bytes")
            stats = StatsCommon()
            mem_used_stats = stats.get_stats(self.servers, bucket, 'memory',
                                             'mem_used')
            total_allocated_bytes_stats = stats.get_stats(
                self.servers, bucket, 'memory', 'total_allocated_bytes')
            total_fragmentation_bytes_stats = stats.get_stats(
                self.servers, bucket, 'memory', 'total_fragmentation_bytes')

            for server in self.servers:
                self.log.info(
                    "In {0} bucket {1}, total_fragmentation_bytes + the total_allocated_bytes = {2}"
                    .format(server.ip, bucket.name,
                            (int(total_fragmentation_bytes_stats[server]) +
                             int(total_allocated_bytes_stats[server]))))
                self.log.info("In {0} bucket {1}, mem_used = {2}".format(
                    server.ip, bucket.name, mem_used_stats[server]))
                self.log.info(
                    "In {0} bucket {1}, the difference between actual memory used by memcached and mem_used is {2} times"
                    .format(
                        server.ip, bucket.name,
                        float(
                            int(total_fragmentation_bytes_stats[server]) +
                            int(total_allocated_bytes_stats[server])) /
                        float(mem_used_stats[server])))

            self.log.info(
                "VERIFICATION <" + msg + ">: Phase1 - Check if any of the " +
                "selected keys have value less than the desired value size")
            for key in selected_keys:
                value = awareness.memcached(key).get(key)[2]
                if len(value) < self.desired_item_size:
                    self.fail(
                        "Failed to append enough to make value size surpass the "
                        + "size {0}, key {1} has size {2}".format(
                            self.desired_item_size, key, len(value)))

            if self.kv_verify:
                self.log.info("VERIFICATION <" + msg +
                              ">: Phase2 - Check if the content " +
                              "after the appends match what's expected")
                for k in verify_dict:
                    if awareness.memcached(k).get(k)[2] != verify_dict[k]:
                        self.fail(
                            "Content at key {0}: not what's expected.".format(
                                k))
                self.log.info("VERIFICATION <" + msg + ">: Successful")

        shell = RemoteMachineShellConnection(self.master)
        shell.execute_cbstats("", "raw", keyname="allocator", vbid="")
        shell.disconnect()
Exemple #30
0
    def test_verify_mb8825(self):
        # Setting up replication clusters.
        src_cluster_name, dest_cluster_name = "remote-dest-src", "remote-src-dest"
        self.__setup_replication_clusters(self.src_master, self.dest_master,
                                          src_cluster_name, dest_cluster_name)

        # Step-3 Load 10k items ( sets=80, deletes=20) on source cluster.
        self._load_all_buckets(self.src_master, self.gen_create, "create", 0)

        # Step-4 XDCR Source -> Remote
        self._replicate_clusters(self.src_master, dest_cluster_name)
        self.merge_buckets(self.src_master,
                           self.dest_master,
                           bidirection=False)

        # Step-5 Wait for replication to finish 50% at destination node
        expected_items = (self.gen_create.end) * 0.5
        dest_master_buckets = self._get_cluster_buckets(self.dest_master)

        tasks = []
        for bucket in dest_master_buckets:
            tasks.append(
                self.cluster.async_wait_for_stats([self.dest_master], bucket,
                                                  '', 'curr_items', '>=',
                                                  expected_items))
        for task in tasks:
            task.result(self.wait_timeout * 5)

        # Perform 20% delete on Source cluster.
        tasks = []
        self.gen_delete = BlobGenerator(
            'loadOne',
            'loadOne-',
            self._value_size,
            start=0,
            end=int((self.num_items) * (float)(self._percent_delete) / 100))
        tasks.extend(
            self._async_load_all_buckets(self.src_master, self.gen_delete,
                                         "delete", 0))

        # Step-6 XDCR Remote -> Source
        self._replicate_clusters(self.dest_master, src_cluster_name)
        self.merge_buckets(self.dest_master,
                           self.src_master,
                           bidirection=False)

        # Wait for delete tasks to be finished
        for task in tasks:
            task.result()

        # Step-8 Compare the source and destination cluster items - item count, meta data, data content.
        self.verify_results()

        # Verify if no deletion performed at source node:
        src_master_buckets = self._get_cluster_buckets(self.src_master)
        for bucket in src_master_buckets:
            src_stat_ep_num_ops_del_meta = 0
            src_stat_ep_num_ops_set_meta = 0
            src_stat_ep_num_ops_get_meta = 0
            src_stat_ep_num_ops_del_meta_res_fail = 0
            src_stat_ep_num_ops_set_meta_res_fail = 0
            for src_node in self.src_nodes:
                src_stat_ep_num_ops_del_meta += int(
                    StatsCommon.get_stats([src_node], bucket, '',
                                          'ep_num_ops_del_meta')[src_node])
                src_stat_ep_num_ops_set_meta += int(
                    StatsCommon.get_stats([src_node], bucket, '',
                                          'ep_num_ops_set_meta')[src_node])
                src_stat_ep_num_ops_get_meta += int(
                    StatsCommon.get_stats([src_node], bucket, '',
                                          'ep_num_ops_get_meta')[src_node])
                src_stat_ep_num_ops_del_meta_res_fail += int(
                    StatsCommon.get_stats(
                        [src_node], bucket, '',
                        'ep_num_ops_del_meta_res_fail')[src_node])
                src_stat_ep_num_ops_set_meta_res_fail += int(
                    StatsCommon.get_stats(
                        [src_node], bucket, '',
                        'ep_num_ops_set_meta_res_fail')[src_node])

            self.assertEqual(
                src_stat_ep_num_ops_set_meta, 0,
                "Number of set [%s] operation occurs at bucket = %s, while expected to 0"
                % (src_stat_ep_num_ops_set_meta, bucket))
            self.assertEqual(
                src_stat_ep_num_ops_del_meta, 0,
                "Number of delete [%s] operation occurs at bucket = %s, while expected to 0"
                % (src_stat_ep_num_ops_del_meta, bucket))

            dest_stat_ep_num_ops_del_meta = 0
            for dest_node in self.dest_nodes:
                dest_stat_ep_num_ops_del_meta += int(
                    StatsCommon.get_stats([dest_node], bucket, '',
                                          'ep_num_ops_del_meta')[dest_node])

            if self.rep_type == "xmem":
                self.assertEqual(
                    src_stat_ep_num_ops_del_meta_res_fail,
                    dest_stat_ep_num_ops_del_meta,
                    "Number of failed delete [%s] operation occurs at bucket = %s, while expected to %s"
                    % (src_stat_ep_num_ops_del_meta_res_fail, bucket,
                       dest_stat_ep_num_ops_del_meta))
                self.assertTrue(
                    src_stat_ep_num_ops_set_meta_res_fail == 0,
                    "Number of failed set [%s] operation occurs at bucket = %s, while expected 0"
                    % (src_stat_ep_num_ops_set_meta_res_fail, bucket))

            elif self.rep_type == "capi":
                self.assertTrue(
                    src_stat_ep_num_ops_get_meta > 0,
                    "Number of get [%s] operation occurs at bucket = %s, while expected greater than 0"
                    % (src_stat_ep_num_ops_get_meta, bucket))
Exemple #31
0
 def _get_backfill_timestamp(self, server, replica_server):
     param = 'tap'
     stat_key = 'eq_tapq:replication_ns_1@%s:backfill_start_timestamp' % (replica_server.ip)
     m_stats = StatsCommon.get_stats([server], self.bucket, param, stat_key)
     self.log.info("eq_tapq:replication_ns_1@%s:backfill_start_timestamp: %s" % (replica_server.ip, m_stats[m_stats.keys()[0]]))
     return int(m_stats[m_stats.keys()[0]])
Exemple #32
0
    def checkpoint_replication_pause_failover(self):
        """Load N items. Stop replication R3. Load N' more items.
        Failover R2. When restart replication to R3, verify backfill doesn't happen on R1."""

        param = 'checkpoint'
        stat_key = 'vb_0:open_checkpoint_id'
        rest = RestConnection(self.master)
        nodes = rest.node_statuses()
        failover_node = None
        for node in nodes:
            if node.id.find(self.replica2.ip) >= 0:
                failover_node = node

        self._set_checkpoint_size(self.servers[:self.num_servers], self.bucket,
                                  self.checkpoint_size)
        generate_load_one = BlobGenerator('nosql',
                                          'nosql-',
                                          self.value_size,
                                          end=self.num_items)
        self._load_all_buckets(self.master,
                               generate_load_one,
                               "create",
                               0,
                               1,
                               0,
                               True,
                               batch_size=self.checkpoint_size,
                               pause_secs=5,
                               timeout_secs=180)
        self._wait_for_stats_all_buckets(self.servers[:self.num_servers])
        prev_backfill_timestamp_R1 = self._get_backfill_timestamp(
            self.replica1, self.replica2)
        m_stats = StatsCommon.get_stats([self.master], self.bucket, param,
                                        stat_key)
        self._stop_replication(self.replica3, self.bucket)

        generate_load_two = BlobGenerator('sqlite',
                                          'sqlite-',
                                          self.value_size,
                                          end=self.num_items)
        data_load_thread = Thread(target=self._load_all_buckets,
                                  name="load_data",
                                  args=(self.master, generate_load_two,
                                        "create", 0, 1, 0, True,
                                        self.checkpoint_size, 5, 180))
        data_load_thread.start()

        failed_over = rest.fail_over(failover_node.id)
        if not failed_over:
            self.log.info(
                "unable to failover the node the first time. try again in  60 seconds.."
            )
            #try again in 60 seconds
            time.sleep(75)
            failed_over = rest.fail_over(failover_node.id)
        self.assertTrue(failed_over,
                        "unable to failover node %s".format(self.replica2.ip))
        self.log.info("failed over node : {0}".format(failover_node.id))
        data_load_thread.join()
        self._start_replication(self.replica3, self.bucket)

        self.servers = []
        self.servers = [self.master, self.replica1, self.replica3]
        self.num_servers = len(self.servers)
        self._verify_checkpoint_id(param, stat_key, m_stats)
        self._verify_stats_all_buckets(self.servers[:self.num_servers])
        self._verify_backfill_happen(self.replica1, self.replica2,
                                     prev_backfill_timestamp_R1)
        self.cluster.rebalance(
            [self.master, self.replica1, self.replica2, self.replica3], [],
            [self.replica2])
        self.cluster.rebalance([self.master, self.replica1, self.replica3],
                               [self.replica2], [])
Exemple #33
0
    def checkpoint_deduplication(self):
        """Disable replication of R1. Load N items to master, then mutate some of them.
        Restart replication of R1, only N items should be in stats. In this test, we can
        only load number of items <= checkpoint_size to observe deduplication"""

        param = 'checkpoint'
        stat_key = 'vb_0:num_open_checkpoint_items'
        stat_key_id = 'vb_0:open_checkpoint_id'

        self._set_checkpoint_size(self.servers[:self.num_servers], self.bucket,
                                  self.checkpoint_size)
        self._stop_replication(self.replica1, self.bucket)

        generate_load = BlobGenerator('nosql',
                                      'nosql-',
                                      self.value_size,
                                      end=self.num_items)
        generate_update = BlobGenerator('nosql',
                                        'sql-',
                                        self.value_size,
                                        end=self.num_items)
        self._load_all_buckets(self.master,
                               generate_load,
                               "create",
                               0,
                               1,
                               0,
                               True,
                               batch_size=self.checkpoint_size,
                               pause_secs=5,
                               timeout_secs=180)
        self._wait_for_stats_all_buckets(
            [self.master, self.replica2, self.replica3])
        m_stats = StatsCommon.get_stats([self.master], self.bucket, param,
                                        stat_key_id)
        data_load_thread = Thread(target=self._load_all_buckets,
                                  name="load_data",
                                  args=(self.master, generate_update, "update",
                                        0, 1, 0, True, self.checkpoint_size, 5,
                                        180))
        data_load_thread.start()
        self._start_replication(self.replica1, self.bucket)
        data_load_thread.join()

        chk_pnt = int(m_stats[m_stats.keys()[0]])
        timeout = 60 if (self.num_items * .001) < 60 else self.num_items * .001
        time.sleep(timeout)
        tasks = []
        tasks.append(
            self.cluster.async_wait_for_stats([self.master], self.bucket,
                                              param, stat_key, '==',
                                              self.num_items))
        tasks.append(
            self.cluster.async_wait_for_stats([self.replica1], self.bucket,
                                              param, stat_key, '==',
                                              self.num_items))
        tasks.append(
            self.cluster.async_wait_for_stats([self.master], self.bucket,
                                              param, stat_key_id, '==',
                                              chk_pnt))
        tasks.append(
            self.cluster.async_wait_for_stats([self.replica1], self.bucket,
                                              param, stat_key_id, '==',
                                              chk_pnt))

        for task in tasks:
            try:
                task.result(60)
            except TimeoutError:
                self.fail("Items weren't deduplicated")

        self._verify_stats_all_buckets(self.servers[:self.num_servers])
Exemple #34
0
    def test_gsi_on_ephemeral_with_eviction_policy(self):
        num_of_docs = self.num_of_docs_per_collection
        self.prepare_collection_for_indexing(num_of_docs_per_collection=self.num_of_docs_per_collection)
        collection_namespace = self.namespaces[0]
        _, keyspace = collection_namespace.split(':')
        bucket, scope, collection = keyspace.split('.')
        index_gen = QueryDefinition(index_name='idx', index_fields=['age', 'country', 'city'])
        meta_index_gen = QueryDefinition(index_name='meta_idx', index_fields=['meta().id'])

        query = index_gen.generate_index_create_query(namespace=collection_namespace, defer_build=self.defer_build)
        self.run_cbq_query(query)
        if self.defer_build:
            build_query = index_gen.generate_build_query(namespace=collection_namespace)
            self.run_cbq_query(build_query)
        self.wait_until_indexes_online()
        query = meta_index_gen.generate_index_create_query(namespace=collection_namespace, defer_build=self.defer_build)
        self.run_cbq_query(query)
        if self.defer_build:
            build_query = meta_index_gen.generate_build_query(namespace=collection_namespace)
            self.run_cbq_query(build_query)
        self.wait_until_indexes_online()

        select_query = f'Select * from {collection_namespace} where age >10 and country like "A%";'
        select_meta_id_query = f'Select meta().id from {collection} where meta().id like "doc_%";'
        count_query = f'Select count(*) from {collection_namespace} where age >= 0;'
        named_collection_query_context = f'default:{bucket}.{scope}'

        select_result = self.run_cbq_query(query=select_query)['results']
        meta_result = self.run_cbq_query(query=select_meta_id_query,
                                         query_context=named_collection_query_context)['results']
        count_result = self.run_cbq_query(query=count_query)['results'][0]['$1']
        self.assertTrue(len(select_result) > 0)
        self.assertEqual(len(meta_result), self.num_of_docs_per_collection)
        self.assertEqual(count_result, self.num_of_docs_per_collection)

        new_inserts = 10 ** 4
        is_memory_full = False
        stats_all_buckets = {}
        for bucket in self.buckets:
            stats_all_buckets[bucket.name] = StatsCommon()

        threshold = 0.93
        last_memory_used_val = 0
        while not is_memory_full:
            gen_create = SDKDataLoader(num_ops=new_inserts, percent_create=100,
                                       percent_update=0, percent_delete=0, scope=scope,
                                       collection=collection, output=True, start_seq_num=num_of_docs+1)
            task = self.cluster.async_load_gen_docs(self.master, bucket, gen_create)
            task.result()
            # Updating the doc counts
            num_of_docs = num_of_docs + new_inserts
            self.sleep(30)
            memory_used = int(stats_all_buckets[bucket.name].get_stats([self.master], bucket, '',
                                                                       'mem_used')[self.master])
            self.log.info(f"Current memory usage: {memory_used}")
            if self.eviction_policy == 'noEviction':
                # memory is considered full if mem_used is at say 90% of the available memory
                if memory_used > threshold * self.bucket_size * 1000000:
                    # Just filling the leftover memory to be double sure
                    gen_create = SDKDataLoader(num_ops=new_inserts, percent_create=100,
                                               percent_update=0, percent_delete=0, scope=scope,
                                               collection=collection, output=True, start_seq_num=num_of_docs + 1)
                    task = self.cluster.async_load_gen_docs(self.master, bucket, gen_create)
                    task.result()
                    num_of_docs = num_of_docs + new_inserts
                    memory_used = int(stats_all_buckets[bucket.name].get_stats([self.master], bucket, '',
                                                                               'mem_used')[self.master])
                    self.log.info(f"Current memory usage: {memory_used}")
                    is_memory_full = True
            else:
                if memory_used < last_memory_used_val:
                    break
                last_memory_used_val = memory_used

        meta_ids = self.run_cbq_query(query=select_meta_id_query,
                                      query_context=named_collection_query_context)['results']

        ids_at_threshold = sorted([item['id'] for item in meta_ids])

        # Pushing new docs to check the eviction policy
        new_inserts = 10 ** 4
        gen_create = SDKDataLoader(num_ops=new_inserts, percent_create=100, json_template="Employee",
                                   percent_update=0, percent_delete=0, scope=scope,
                                   collection=collection, output=True, start_seq_num=num_of_docs+1)
        tasks = self.data_ops_javasdk_loader_in_batches(sdk_data_loader=gen_create, batch_size=10000)
        for task in tasks:
            out = task.result()
            self.log.info(out)

        meta_ids_with_eviction_enforced = self.run_cbq_query(query=select_meta_id_query,
                                                             query_context=named_collection_query_context)['results']
        ids_after_threshold = sorted([item['id'] for item in meta_ids_with_eviction_enforced])

        if self.eviction_policy == 'noEviction':
            self.assertEqual(len(meta_ids_with_eviction_enforced), len(meta_ids))
            self.assertEqual(ids_at_threshold, ids_after_threshold)
        else:
            self.assertTrue(len(meta_ids_with_eviction_enforced) != len(meta_ids))
            self.assertTrue(ids_after_threshold != ids_at_threshold)