Example #1
0
    def delete_bucket_or_assert(serverInfo, bucket='default', test_case=None):
        log = logger.Logger.get_logger()
        log.info('deleting existing bucket {0} on {1}'.format(
            bucket, serverInfo))

        rest = RestConnection(serverInfo)
        if RestHelper(rest).bucket_exists(bucket):
            status = rest.delete_bucket(bucket)
            if not status:
                try:
                    BucketOperationHelper.print_dataStorage_content(
                        [serverInfo])
                    log.info(
                        StatsCommon.get_stats([serverInfo], bucket, "timings"))
                except:
                    log.error("Unable to get timings for bucket")
            log.info('deleted bucket : {0} from {1}'.format(
                bucket, serverInfo.ip))
        msg = 'bucket "{0}" was not deleted even after waiting for two minutes'.format(
            bucket)
        if test_case:
            if not BucketOperationHelper.wait_for_bucket_deletion(
                    bucket, rest, 200):
                try:
                    BucketOperationHelper.print_dataStorage_content(
                        [serverInfo])
                    log.info(
                        StatsCommon.get_stats([serverInfo], bucket, "timings"))
                except:
                    log.error("Unable to get timings for bucket")
                test_case.fail(msg)
Example #2
0
 def delete_all_buckets_or_assert(servers, test_case):
     log = logger.Logger.get_logger()
     for serverInfo in servers:
         rest = RestConnection(serverInfo)
         buckets = []
         try:
             buckets = rest.get_buckets()
         except Exception as e:
             log.error(e)
             log.error(
                 '15 seconds sleep before calling get_buckets again...')
             time.sleep(15)
             buckets = rest.get_buckets()
         if len(buckets) > 0:
             log.info('deleting existing buckets {0} on {1}'.format(
                 [b.name for b in buckets], serverInfo.ip))
             for bucket in buckets:
                 log.info("remove bucket {0} ...".format(bucket.name))
                 try:
                     status = rest.delete_bucket(bucket.name)
                 except ServerUnavailableException as e:
                     log.error(e)
                     log.error(
                         '5 seconds sleep before calling delete_bucket again...'
                     )
                     time.sleep(5)
                     status = rest.delete_bucket(bucket.name)
                 if not status:
                     try:
                         BucketOperationHelper.print_dataStorage_content(
                             servers)
                         log.info(
                             StatsCommon.get_stats([serverInfo],
                                                   bucket.name, "timings"))
                     except:
                         log.error("Unable to get timings for bucket")
                 log.info('deleted bucket : {0} from {1}'.format(
                     bucket.name, serverInfo.ip))
                 msg = 'bucket "{0}" was not deleted even after waiting for two minutes'.format(
                     bucket.name)
                 if test_case:
                     if not BucketOperationHelper.wait_for_bucket_deletion(
                             bucket.name, rest, 200):
                         try:
                             BucketOperationHelper.print_dataStorage_content(
                                 servers)
                             log.info(
                                 StatsCommon.get_stats([serverInfo],
                                                       bucket.name,
                                                       "timings"))
                         except:
                             log.error("Unable to get timings for bucket")
                         test_case.fail(msg)
             log.info(
                 "sleep 2 seconds to make sure all buckets ({}) were deleted completely."
                 .format([b.name for b in buckets]))
             time.sleep(2)
Example #3
0
 def delete_all_buckets_or_assert(servers, test_case, timeout=200):
     log = logger.Logger.get_logger()
     for serverInfo in servers:
         if serverInfo.dummy:
             continue
         rest = RestConnection(serverInfo)
         # retrying to get buckets with poll_interval and limit of retries
         buckets = rest.get_buckets(num_retries=3, poll_interval=5)
         if len(buckets) > 0:
             log.info('deleting existing buckets {0} on {1}'.format(
                 [b.name for b in buckets], serverInfo.ip))
             for bucket in buckets:
                 # trying to send rest call to delete bucket with poll_interval and limit of retries
                 status = rest.delete_bucket(bucket.name,
                                             num_retries=3,
                                             poll_interval=5)
                 if not status:
                     try:
                         BucketOperationHelper.print_dataStorage_content(
                             servers)
                         log.info(
                             StatsCommon.get_stats([serverInfo],
                                                   bucket.name, "timings"))
                     except:
                         log.error("Unable to get timings for bucket")
                 # trying to check if bucket already deleted? poll_interval=0.1, timeout=200
                 is_bucket_deleted = BucketOperationHelper.wait_for_bucket_deletion(
                     bucket.name, rest, timeout)
                 if not is_bucket_deleted:
                     try:
                         BucketOperationHelper.print_dataStorage_content(
                             servers)
                         log.info(
                             StatsCommon.get_stats([serverInfo],
                                                   bucket.name, "timings"))
                     except:
                         log.error("Unable to get timings for bucket")
                     if test_case:
                         msg = 'bucket "{0}" was not deleted even after waiting for {1} seconds.'.format(
                             bucket.name, timeout)
                         test_case.fail(msg)
                 else:
                     log.info('deleted bucket : {0} from {1}'.format(
                         bucket.name, serverInfo.ip))
         else:
             log.info(
                 "Could not find any buckets for node {0}, nothing to delete"
                 .format(serverInfo.ip))
Example #4
0
    def checkpoint_create_items(self):
        """Load data until a new checkpoint is created on all replicas"""

        param = 'checkpoint'
        stat_key = 'vb_0:open_checkpoint_id'

        self._set_checkpoint_size(self.servers[:self.num_servers], self.bucket,
                                  str(self.checkpoint_size))
        chk_stats = StatsCommon.get_stats([self.master], self.bucket, param,
                                          stat_key)

        generate_load = BlobGenerator('nosql',
                                      'nosql-',
                                      self.value_size,
                                      end=self.num_items)
        self._load_all_buckets(self.master,
                               generate_load,
                               "create",
                               0,
                               1,
                               0,
                               True,
                               batch_size=self.checkpoint_size,
                               pause_secs=5,
                               timeout_secs=180)
        self._wait_for_stats_all_buckets(self.servers[:self.num_servers])

        self._verify_checkpoint_id(param, stat_key, chk_stats)
        self._verify_stats_all_buckets(self.servers[:self.num_servers])
Example #5
0
    def _verify_checkpoint_id(self, param, stat_key, m_stats):
        timeout = 60 if (self.num_items * .001) < 60 else self.num_items * .001

        #verify checkpiont id increases on master node
        chk_pnt = int(m_stats[m_stats.keys()[0]])
        tasks = []
        tasks.append(
            self.cluster.async_wait_for_stats([self.master], self.bucket,
                                              param, stat_key, '>', chk_pnt))
        for task in tasks:
            try:
                task.result(timeout)
            except TimeoutError:
                self.fail("New checkpoint not created")

        time.sleep(timeout / 10)
        # verify Master and all replicas are in sync with checkpoint ids
        m_stats = StatsCommon.get_stats([self.master], self.bucket, param,
                                        stat_key)
        chk_pnt = int(m_stats[m_stats.keys()[0]])
        tasks = []
        for server in self.servers:
            tasks.append(
                self.cluster.async_wait_for_stats([server], self.bucket, param,
                                                  stat_key, '==', chk_pnt))
        for task in tasks:
            try:
                task.result(timeout)
            except TimeoutError:
                self.fail(
                    "Master and all replicas are NOT in sync with checkpoint ids"
                )
Example #6
0
    def checkpoint_create_time(self):
        """Load data, but let the timeout create a new checkpoint on all replicas"""

        param = 'checkpoint'
        stat_key = 'vb_0:open_checkpoint_id'

        self._set_checkpoint_timeout(self.servers[:self.num_servers],
                                     self.bucket, str(self.timeout))

        generate_load = BlobGenerator('nosql',
                                      'nosql-',
                                      self.value_size,
                                      end=self.num_items)
        self._load_all_buckets(self.master,
                               generate_load,
                               "create",
                               0,
                               1,
                               0,
                               True,
                               batch_size=self.checkpoint_size,
                               pause_secs=5,
                               timeout_secs=180)
        self._wait_for_stats_all_buckets(self.servers[:self.num_servers])

        chk_stats = StatsCommon.get_stats([self.master], self.bucket, param,
                                          stat_key)
        self.log.info("Sleeping for {0} seconds)".format(self.timeout + 5))
        time.sleep(self.timeout + 5)
        self._verify_checkpoint_id(param, stat_key, chk_stats)
        self._verify_stats_all_buckets(self.servers[:self.num_servers])
Example #7
0
    def _verify_checkpoint_id(self, param, stat_key, m_stats):
        timeout = 60 if (self.num_items * .001) < 60 else self.num_items * .001

        #verify checkpiont id increases on master node
        chk_pnt = int(m_stats[m_stats.keys()[0]])
        tasks = []
        tasks.append(self.cluster.async_wait_for_stats([self.master], self.bucket, param, stat_key, '>', chk_pnt))
        for task in tasks:
            try:
                task.result(timeout)
            except TimeoutError:
                self.fail("New checkpoint not created")

        time.sleep(timeout / 10)
        # verify Master and all replicas are in sync with checkpoint ids
        m_stats = StatsCommon.get_stats([self.master], self.bucket, param, stat_key)
        chk_pnt = int(m_stats[m_stats.keys()[0]])
        tasks = []
        for server in self.servers:
            tasks.append(self.cluster.async_wait_for_stats([server], self.bucket, param, stat_key, '==', chk_pnt))
        for task in tasks:
            try:
                task.result(timeout)
            except TimeoutError:
                self.fail("Master and all replicas are NOT in sync with checkpoint ids")
Example #8
0
    def checkpoint_server_down(self):
        """Load N items. Shut down server R2. Then Restart R2 and
        verify backfill happens on R1 and R2."""

        param = 'checkpoint'
        stat_key = 'vb_0:open_checkpoint_id'
        rest = RestConnection(self.master)

        self._set_checkpoint_size(self.servers[:self.num_servers], self.bucket, self.checkpoint_size)
        generate_load_one = BlobGenerator('nosql', 'nosql-', self.value_size, end=self.num_items)
        self._load_all_buckets(self.master, generate_load_one, "create", 0, 1, 0, True, batch_size=self.checkpoint_size, pause_secs=5, timeout_secs=180)
        self._wait_for_stats_all_buckets(self.servers[:self.num_servers])
        prev_backfill_timestamp_R1 = self._get_backfill_timestamp(self.replica1, self.replica2)
        prev_backfill_timestamp_R2 = self._get_backfill_timestamp(self.replica2, self.replica3)

        m_stats = StatsCommon.get_stats([self.master], self.bucket, param, stat_key)
        self._stop_server(self.replica2)
        time.sleep(5)
        data_load_thread = Thread(target=self._load_data_use_workloadgen, name="load_data", args=(self.master,))
        data_load_thread.start()
        data_load_thread.join()
        self._start_server(self.replica2)
        time.sleep(5)

        self._verify_checkpoint_id(param, stat_key, m_stats)
        self._verify_backfill_happen(self.replica1, self.replica2, prev_backfill_timestamp_R1, True)
        self._verify_backfill_happen(self.replica2, self.replica3, prev_backfill_timestamp_R2, True)
Example #9
0
    def checkpoint_collapse(self):
        """With 3 replicas, stop replication on R2, let Master and R1 close checkpoint.
        Run load until a new checkpoint is created on Master and R1.
        Wait till checkpoints merge on R1. Restart replication of R2.
        Checkpoint should advance to the latest on R2."""

        param = 'checkpoint'
        stat_key = 'vb_0:last_closed_checkpoint_id'
        stat_chk_itms = 'vb_0:num_checkpoint_items'

        self._set_checkpoint_size(self.servers[:self.num_servers], self.bucket,
                                  str(self.checkpoint_size))
        self._stop_replication(self.replica2, self.bucket)

        generate_load = BlobGenerator('nosql',
                                      'nosql-',
                                      self.value_size,
                                      end=self.num_items)
        data_load_thread = Thread(target=self._load_all_buckets,
                                  name="load_data",
                                  args=(self.master, generate_load, "create",
                                        0, 1, 0, True, self.checkpoint_size, 5,
                                        180))
        data_load_thread.start()
        m_stats = StatsCommon.get_stats([self.master], self.bucket, param,
                                        stat_key)

        tasks = []
        chk_pnt = int(m_stats[m_stats.keys()[0]]) + 2
        tasks.append(
            self.cluster.async_wait_for_stats([self.master], self.bucket,
                                              param, stat_key, '>=', chk_pnt))
        tasks.append(
            self.cluster.async_wait_for_stats([self.replica1], self.bucket,
                                              param, stat_key, '>=', chk_pnt))
        tasks.append(
            self.cluster.async_wait_for_stats([self.replica1], self.bucket,
                                              param, stat_chk_itms, '>=',
                                              self.num_items))
        data_load_thread.join()
        for task in tasks:
            try:
                task.result(60)
            except TimeoutError:
                self.fail("Checkpoint not collapsed")

        tasks = []
        self._start_replication(self.replica2, self.bucket)
        tasks.append(
            self.cluster.async_wait_for_stats([self.replica1], self.bucket,
                                              param, stat_chk_itms, '<',
                                              self.num_items))
        for task in tasks:
            try:
                task.result(60)
            except TimeoutError:
                self.fail("Checkpoints not replicated to replica2")

        self._verify_checkpoint_id(param, stat_key, m_stats)
        self._verify_stats_all_buckets(self.servers[:self.num_servers])
Example #10
0
 def _get_backfill_timestamp(self, server, replica_server):
     param = 'tap'
     stat_key = 'eq_tapq:replication_ns_1@%s:backfill_start_timestamp' % (
         replica_server.ip)
     m_stats = StatsCommon.get_stats([server], self.bucket, param, stat_key)
     self.log.info(
         "eq_tapq:replication_ns_1@%s:backfill_start_timestamp: %s" %
         (replica_server.ip, m_stats[m_stats.keys()[0]]))
     return int(m_stats[m_stats.keys()[0]])
Example #11
0
 def delete_all_buckets_or_assert(servers, test_case):
     log = logger.Logger.get_logger()
     for serverInfo in servers:
         rest = RestConnection(serverInfo)
         buckets = []
         try:
             buckets = rest.get_buckets()
         except Exception as e:
             log.error(e)
             log.error('15 seconds sleep before calling get_buckets again...')
             time.sleep(15)
             buckets = rest.get_buckets()
         log.info('deleting existing buckets {0} on {1}'.format([b.name for b in buckets], serverInfo.ip))
         for bucket in buckets:
             log.info("remove bucket {0} ...".format(bucket.name))
             try:
                 status = rest.delete_bucket(bucket.name)
             except ServerUnavailableException as e:
                 log.error(e)
                 log.error('5 seconds sleep before calling delete_bucket again...')
                 time.sleep(5)
                 status = rest.delete_bucket(bucket.name)
             if not status:
                 try:
                     BucketOperationHelper.print_dataStorage_content(servers)
                     log.info(StatsCommon.get_stats([serverInfo], bucket.name, "timings"))
                 except:
                     log.error("Unable to get timings for bucket")
             log.info('deleted bucket : {0} from {1}'.format(bucket.name, serverInfo.ip))
             msg = 'bucket "{0}" was not deleted even after waiting for two minutes'.format(bucket.name)
             if test_case:
                 if not BucketOperationHelper.wait_for_bucket_deletion(bucket.name, rest, 200):
                     try:
                         BucketOperationHelper.print_dataStorage_content(servers)
                         log.info(StatsCommon.get_stats([serverInfo], bucket.name, "timings"))
                     except:
                         log.error("Unable to get timings for bucket")
                     test_case.fail(msg)
         log.info("sleep 2 seconds to make sure all buckets were deleted completely.")
         time.sleep(2)
Example #12
0
    def checkpoint_replication_pause(self):
        """With 3 replicas load data. pause replication to R2. Let checkpoints close on Master and R1.
        Restart replication of R2 and R3, backfill should not be seen on R1 and R2."""

        param = 'checkpoint'
        stat_key = 'vb_0:last_closed_checkpoint_id'

        self._set_checkpoint_size(self.servers[:self.num_servers], self.bucket,
                                  str(self.checkpoint_size))
        time.sleep(5)
        prev_backfill_timestamp_R1 = self._get_backfill_timestamp(
            self.replica1, self.replica2)
        prev_backfill_timestamp_R2 = self._get_backfill_timestamp(
            self.replica2, self.replica3)

        generate_load = BlobGenerator('nosql',
                                      'nosql-',
                                      self.value_size,
                                      end=self.num_items)
        data_load_thread = Thread(target=self._load_all_buckets,
                                  name="load_data",
                                  args=(self.master, generate_load, "create",
                                        0, 1, 0, True, self.checkpoint_size, 5,
                                        180))
        data_load_thread.start()
        self._stop_replication(self.replica2, self.bucket)

        m_stats = StatsCommon.get_stats([self.master], self.bucket, param,
                                        stat_key)
        chk_pnt = int(m_stats[m_stats.keys()[0]]) + 2
        tasks = []
        tasks.append(
            self.cluster.async_wait_for_stats([self.master], self.bucket,
                                              param, stat_key, '>=', chk_pnt))
        tasks.append(
            self.cluster.async_wait_for_stats([self.replica1], self.bucket,
                                              param, stat_key, '>=', chk_pnt))
        for task in tasks:
            try:
                task.result(60)
            except TimeoutError:
                self.fail("Checkpoint not closed")

        data_load_thread.join()
        self._start_replication(self.replica2, self.bucket)

        self._verify_checkpoint_id(param, stat_key, m_stats)
        self._verify_stats_all_buckets(self.servers[:self.num_servers])
        self._verify_backfill_happen(self.replica1, self.replica2,
                                     prev_backfill_timestamp_R1)
        self._verify_backfill_happen(self.replica2, self.replica3,
                                     prev_backfill_timestamp_R2)
Example #13
0
    def delete_bucket_or_assert(serverInfo, bucket='default', test_case=None):
        log = logger.Logger.get_logger()
        log.info('deleting existing bucket {0} on {1}'.format(bucket, serverInfo))

        rest = RestConnection(serverInfo)
        if RestHelper(rest).bucket_exists(bucket):
            status = rest.delete_bucket(bucket)
            if not status:
                try:
                    BucketOperationHelper.print_dataStorage_content([serverInfo])
                    log.info(StatsCommon.get_stats([serverInfo], bucket, "timings"))
                except:
                    log.error("Unable to get timings for bucket")
            log.info('deleted bucket : {0} from {1}'.format(bucket, serverInfo.ip))
        msg = 'bucket "{0}" was not deleted even after waiting for two minutes'.format(bucket)
        if test_case:
            if not BucketOperationHelper.wait_for_bucket_deletion(bucket, rest, 200):
                try:
                    BucketOperationHelper.print_dataStorage_content([serverInfo])
                    log.info(StatsCommon.get_stats([serverInfo], bucket, "timings"))
                except:
                    log.error("Unable to get timings for bucket")
                test_case.fail(msg)
Example #14
0
    def checkpoint_create_items(self):
        """Load data until a new checkpoint is created on all replicas"""

        param = 'checkpoint'
        stat_key = 'vb_0:open_checkpoint_id'

        self._set_checkpoint_size(self.servers[:self.num_servers], self.bucket, str(self.checkpoint_size))
        chk_stats = StatsCommon.get_stats([self.master], self.bucket, param, stat_key)

        generate_load = BlobGenerator('nosql', 'nosql-', self.value_size, end=self.num_items)
        self._load_all_buckets(self.master, generate_load, "create", 0, 1, 0, True, batch_size=self.checkpoint_size, pause_secs=5, timeout_secs=180)
        self._wait_for_stats_all_buckets(self.servers[:self.num_servers])

        self._verify_checkpoint_id(param, stat_key, chk_stats)
        self._verify_stats_all_buckets(self.servers[:self.num_servers])
Example #15
0
    def checkpoint_collapse(self):
        """With 3 replicas, stop replication on R2, let Master and R1 close checkpoint.
        Run load until a new checkpoint is created on Master and R1.
        Wait till checkpoints merge on R1. Restart replication of R2.
        Checkpoint should advance to the latest on R2."""

        param = 'checkpoint'
        stat_key = 'vb_0:last_closed_checkpoint_id'
        stat_chk_itms = 'vb_0:num_checkpoint_items'

        self._set_checkpoint_size(self.servers[:self.num_servers], self.bucket, str(self.checkpoint_size))
        self._stop_replication(self.replica2, self.bucket)

        generate_load = BlobGenerator('nosql', 'nosql-', self.value_size, end=self.num_items)
        data_load_thread = Thread(target=self._load_all_buckets,
                                  name="load_data",
                                  args=(self.master, generate_load, "create", 0, 1, 0, True, self.checkpoint_size, 5, 180))
        data_load_thread.start()
        m_stats = StatsCommon.get_stats([self.master], self.bucket, param, stat_key)

        tasks = []
        chk_pnt = int(m_stats[m_stats.keys()[0]]) + 2
        tasks.append(self.cluster.async_wait_for_stats([self.master], self.bucket, param, stat_key,
                                                       '>=', chk_pnt))
        tasks.append(self.cluster.async_wait_for_stats([self.replica1], self.bucket, param, stat_key,
                                                       '>=', chk_pnt))
        tasks.append(self.cluster.async_wait_for_stats([self.replica1], self.bucket, param,
                                                       stat_chk_itms, '>=', self.num_items))
        data_load_thread.join()
        for task in tasks:
            try:
                task.result(60)
            except TimeoutError:
                self.fail("Checkpoint not collapsed")

        tasks = []
        self._start_replication(self.replica2, self.bucket)
        tasks.append(self.cluster.async_wait_for_stats([self.replica1], self.bucket, param,
                                                       stat_chk_itms, '<', self.num_items))
        for task in tasks:
            try:
                task.result(60)
            except TimeoutError:
                self.fail("Checkpoints not replicated to replica2")

        self._verify_checkpoint_id(param, stat_key, m_stats)
        self._verify_stats_all_buckets(self.servers[:self.num_servers])
Example #16
0
    def checkpoint_server_down(self):
        """Load N items. Shut down server R2. Then Restart R2 and
        verify backfill happens on R1 and R2."""

        param = 'checkpoint'
        stat_key = 'vb_0:open_checkpoint_id'
        rest = RestConnection(self.master)

        self._set_checkpoint_size(self.servers[:self.num_servers], self.bucket,
                                  self.checkpoint_size)
        generate_load_one = BlobGenerator('nosql',
                                          'nosql-',
                                          self.value_size,
                                          end=self.num_items)
        self._load_all_buckets(self.master,
                               generate_load_one,
                               "create",
                               0,
                               1,
                               0,
                               True,
                               batch_size=self.checkpoint_size,
                               pause_secs=5,
                               timeout_secs=180)
        self._wait_for_stats_all_buckets(self.servers[:self.num_servers])
        prev_backfill_timestamp_R1 = self._get_backfill_timestamp(
            self.replica1, self.replica2)
        prev_backfill_timestamp_R2 = self._get_backfill_timestamp(
            self.replica2, self.replica3)

        m_stats = StatsCommon.get_stats([self.master], self.bucket, param,
                                        stat_key)
        self._stop_server(self.replica2)
        time.sleep(5)
        data_load_thread = Thread(target=self._load_data_use_workloadgen,
                                  name="load_data",
                                  args=(self.master, ))
        data_load_thread.start()
        data_load_thread.join()
        self._start_server(self.replica2)
        time.sleep(5)

        self._verify_checkpoint_id(param, stat_key, m_stats)
        self._verify_backfill_happen(self.replica1, self.replica2,
                                     prev_backfill_timestamp_R1, True)
        self._verify_backfill_happen(self.replica2, self.replica3,
                                     prev_backfill_timestamp_R2, True)
Example #17
0
    def checkpoint_replication_pause_failover(self):
        """Load N items. Stop replication R3. Load N' more items.
        Failover R2. When restart replication to R3, verify backfill doesn't happen on R1."""

        param = 'checkpoint'
        stat_key = 'vb_0:open_checkpoint_id'
        rest = RestConnection(self.master)
        nodes = rest.node_statuses()
        failover_node = None
        for node in nodes:
            if node.id.find(self.replica2.ip) >= 0:
                failover_node = node

        self._set_checkpoint_size(self.servers[:self.num_servers], self.bucket, self.checkpoint_size)
        generate_load_one = BlobGenerator('nosql', 'nosql-', self.value_size, end=self.num_items)
        self._load_all_buckets(self.master, generate_load_one, "create", 0, 1, 0, True, batch_size=self.checkpoint_size, pause_secs=5, timeout_secs=180)
        self._wait_for_stats_all_buckets(self.servers[:self.num_servers])
        prev_backfill_timestamp_R1 = self._get_backfill_timestamp(self.replica1, self.replica2)
        m_stats = StatsCommon.get_stats([self.master], self.bucket, param, stat_key)
        self._stop_replication(self.replica3, self.bucket)

        generate_load_two = BlobGenerator('sqlite', 'sqlite-', self.value_size, end=self.num_items)
        data_load_thread = Thread(target=self._load_all_buckets,
                                          name="load_data",
                                          args=(self.master, generate_load_two, "create", 0, 1, 0, True, self.checkpoint_size, 5, 180))
        data_load_thread.start()

        failed_over = rest.fail_over(failover_node.id)
        if not failed_over:
            self.log.info("unable to failover the node the first time. try again in  60 seconds..")
            #try again in 60 seconds
            time.sleep(75)
            failed_over = rest.fail_over(failover_node.id)
        self.assertTrue(failed_over, "unable to failover node %s".format(self.replica2.ip))
        self.log.info("failed over node : {0}".format(failover_node.id))
        data_load_thread.join()
        self._start_replication(self.replica3, self.bucket)

        self.servers = []
        self.servers = [self.master, self.replica1, self.replica3]
        self.num_servers = len(self.servers)
        self._verify_checkpoint_id(param, stat_key, m_stats)
        self._verify_stats_all_buckets(self.servers[:self.num_servers])
        self._verify_backfill_happen(self.replica1, self.replica2, prev_backfill_timestamp_R1)
        self.cluster.rebalance([self.master, self.replica1, self.replica2, self.replica3], [], [self.replica2])
        self.cluster.rebalance([self.master, self.replica1, self.replica3], [self.replica2], [])
Example #18
0
    def checkpoint_create_time(self):
        """Load data, but let the timeout create a new checkpoint on all replicas"""

        param = 'checkpoint'
        stat_key = 'vb_0:open_checkpoint_id'

        self._set_checkpoint_timeout(self.servers[:self.num_servers], self.bucket, str(self.timeout))

        generate_load = BlobGenerator('nosql', 'nosql-', self.value_size, end=self.num_items)
        self._load_all_buckets(self.master, generate_load, "create", 0, 1, 0, True, batch_size=self.checkpoint_size, pause_secs=5, timeout_secs=180)
        self._wait_for_stats_all_buckets(self.servers[:self.num_servers])

        chk_stats = StatsCommon.get_stats([self.master], self.bucket, param, stat_key)
        self.log.info("Sleeping for {0} seconds)".format(self.timeout + 5))
        time.sleep(self.timeout + 5)
        self._verify_checkpoint_id(param, stat_key, chk_stats)
        self._verify_stats_all_buckets(self.servers[:self.num_servers])
Example #19
0
    def checkpoint_deduplication(self):
        """Disable replication of R1. Load N items to master, then mutate some of them.
        Restart replication of R1, only N items should be in stats. In this test, we can
        only load number of items <= checkpoint_size to observe deduplication"""

        param = 'checkpoint'
        stat_key = 'vb_0:num_open_checkpoint_items'
        stat_key_id = 'vb_0:open_checkpoint_id'

        self._set_checkpoint_size(self.servers[:self.num_servers], self.bucket, self.checkpoint_size)
        self._stop_replication(self.replica1, self.bucket)

        generate_load = BlobGenerator('nosql', 'nosql-', self.value_size, end=self.num_items)
        generate_update = BlobGenerator('nosql', 'sql-', self.value_size, end=self.num_items)
        self._load_all_buckets(self.master, generate_load, "create", 0, 1, 0, True, batch_size=self.checkpoint_size, pause_secs=5, timeout_secs=180)
        self._wait_for_stats_all_buckets([self.master, self.replica2, self.replica3])
        m_stats = StatsCommon.get_stats([self.master], self.bucket, param, stat_key_id)
        data_load_thread = Thread(target=self._load_all_buckets,
                                  name="load_data",
                                  args=(self.master, generate_update, "update", 0, 1, 0, True, self.checkpoint_size, 5, 180))
        data_load_thread.start()
        self._start_replication(self.replica1, self.bucket)
        data_load_thread.join()

        chk_pnt = int(m_stats[m_stats.keys()[0]])
        timeout = 60 if (self.num_items * .001) < 60 else self.num_items * .001
        time.sleep(timeout)
        tasks = []
        tasks.append(self.cluster.async_wait_for_stats([self.master], self.bucket, param,
                                                       stat_key, '==', self.num_items))
        tasks.append(self.cluster.async_wait_for_stats([self.replica1], self.bucket, param,
                                                       stat_key, '==', self.num_items))
        tasks.append(self.cluster.async_wait_for_stats([self.master], self.bucket, param,
                                                       stat_key_id, '==', chk_pnt))
        tasks.append(self.cluster.async_wait_for_stats([self.replica1], self.bucket, param,
                                                       stat_key_id, '==', chk_pnt))

        for task in tasks:
            try:
                task.result(60)
            except TimeoutError:
                self.fail("Items weren't deduplicated")

        self._verify_stats_all_buckets(self.servers[:self.num_servers])
Example #20
0
    def checkpoint_replication_pause(self):
        """With 3 replicas load data. pause replication to R2. Let checkpoints close on Master and R1.
        Restart replication of R2 and R3, backfill should not be seen on R1 and R2."""

        param = 'checkpoint'
        stat_key = 'vb_0:last_closed_checkpoint_id'

        self._set_checkpoint_size(self.servers[:self.num_servers], self.bucket, str(self.checkpoint_size))
        time.sleep(5)
        prev_backfill_timestamp_R1 = self._get_backfill_timestamp(self.replica1, self.replica2)
        prev_backfill_timestamp_R2 = self._get_backfill_timestamp(self.replica2, self.replica3)

        generate_load = BlobGenerator('nosql', 'nosql-', self.value_size, end=self.num_items)
        data_load_thread = Thread(target=self._load_all_buckets,
                                  name="load_data",
                                  args=(self.master, generate_load, "create", 0, 1, 0, True, self.checkpoint_size, 5, 180))
        data_load_thread.start()
        self._stop_replication(self.replica2, self.bucket)

        m_stats = StatsCommon.get_stats([self.master], self.bucket, param, stat_key)
        chk_pnt = int(m_stats[m_stats.keys()[0]]) + 2
        tasks = []
        tasks.append(self.cluster.async_wait_for_stats([self.master], self.bucket, param, stat_key,
                                                       '>=', chk_pnt))
        tasks.append(self.cluster.async_wait_for_stats([self.replica1], self.bucket, param, stat_key,
                                                       '>=', chk_pnt))
        for task in tasks:
            try:
                task.result(60)
            except TimeoutError:
                self.fail("Checkpoint not closed")

        data_load_thread.join()
        self._start_replication(self.replica2, self.bucket)

        self._verify_checkpoint_id(param, stat_key, m_stats)
        self._verify_stats_all_buckets(self.servers[:self.num_servers])
        self._verify_backfill_happen(self.replica1, self.replica2, prev_backfill_timestamp_R1)
        self._verify_backfill_happen(self.replica2, self.replica3, prev_backfill_timestamp_R2)
 def stat(self, key):
     stats = StatsCommon.get_stats([self.master], 'default', "", key)
     val = stats.values()[0]
     if val.isdigit():
         val = int(val)
     return val
Example #22
0
 def stat(self, key):
     stats =  StatsCommon.get_stats([self.master], 'default', "", key)
     val = stats.values()[0]
     if val.isdigit():
         val = int(val)
     return val
Example #23
0
    def test_items_append(self):
        self.desired_item_size = self.input.param("desired_item_size", 2048)
        self.append_size = self.input.param("append_size", 1024)
        self.fixed_append_size = self.input.param("fixed_append_size", True)
        self.append_ratio = self.input.param("append_ratio", 0.5)
        self._load_all_buckets(self.master,
                               self.gen_create,
                               "create",
                               0,
                               batch_size=1000,
                               pause_secs=5,
                               timeout_secs=100)

        for bucket in self.buckets:
            self.value_size = self.input.param("value_size", 512)
            verify_dict = {}
            vkeys, dkeys = bucket.kvs[1].key_set()

            key_count = len(vkeys)
            app_ratio = self.append_ratio * key_count
            selected_keys = []
            i = 0
            for key in vkeys:
                i += 1
                if i >= app_ratio:
                    break
                selected_keys.append(key)

            awareness = VBucketAwareMemcached(RestConnection(self.master),
                                              bucket.name)
            if self.kv_verify:
                for key in selected_keys:
                    value = awareness.memcached(key).get(key)[2]
                    verify_dict[key] = value

            self.log.info("Bucket: {0}".format(bucket.name))
            self.log.info("Appending to have items whose initial size was " +
                          "{0} to equal or cross a size of {1}".format(
                              self.value_size, self.desired_item_size))
            self.log.info("Item-appending of {0} items starting ..".format(
                len(selected_keys) + 1))

            index = 3
            while self.value_size < self.desired_item_size:
                str_len = self.append_size
                if not self.fixed_append_size:
                    str_len = int(math.pow(2, index))

                for key in selected_keys:
                    random_string = self.random_str_generator(str_len)
                    awareness.memcached(key).append(key, random_string)
                    if self.kv_verify:
                        verify_dict[key] = verify_dict[key] + random_string
                self.log.info(
                    "for {0} items size was increased to {1} Bytes".format(
                        len(selected_keys) + 1, self.value_size))
                self.value_size += str_len
                index += 1

            self.log.info("The appending of {0} items ended".format(
                len(selected_keys) + 1))

        for bucket in self.buckets:
            msg = "Bucket:{0}".format(bucket.name)
            self.log.info("VERIFICATION <" + msg +
                          ">: Phase 0 - Check the gap between " +
                          "mem_used by the bucket and total_allocated_bytes")
            stats = StatsCommon()
            mem_used_stats = stats.get_stats(self.servers, bucket, 'memory',
                                             'mem_used')
            total_allocated_bytes_stats = stats.get_stats(
                self.servers, bucket, 'memory', 'total_allocated_bytes')
            total_fragmentation_bytes_stats = stats.get_stats(
                self.servers, bucket, 'memory', 'total_fragmentation_bytes')

            for server in self.servers:
                self.log.info(
                    "In {0} bucket {1}, total_fragmentation_bytes + the total_allocated_bytes = {2}"
                    .format(server.ip, bucket.name,
                            (int(total_fragmentation_bytes_stats[server]) +
                             int(total_allocated_bytes_stats[server]))))
                self.log.info("In {0} bucket {1}, mem_used = {2}".format(
                    server.ip, bucket.name, mem_used_stats[server]))
                self.log.info(
                    "In {0} bucket {1}, the difference between actual memory used by memcached and mem_used is {2} times"
                    .format(
                        server.ip, bucket.name,
                        float(
                            int(total_fragmentation_bytes_stats[server]) +
                            int(total_allocated_bytes_stats[server])) /
                        float(mem_used_stats[server])))

            self.log.info(
                "VERIFICATION <" + msg + ">: Phase1 - Check if any of the " +
                "selected keys have value less than the desired value size")
            for key in selected_keys:
                value = awareness.memcached(key).get(key)[2]
                if len(value) < self.desired_item_size:
                    self.fail(
                        "Failed to append enough to make value size surpass the "
                        + "size {0}, key {1} has size {2}".format(
                            self.desired_item_size, key, len(value)))

            if self.kv_verify:
                self.log.info("VERIFICATION <" + msg +
                              ">: Phase2 - Check if the content " +
                              "after the appends match what's expected")
                for k in verify_dict:
                    if awareness.memcached(k).get(k)[2] != verify_dict[k]:
                        self.fail(
                            "Content at key {0}: not what's expected.".format(
                                k))
                self.log.info("VERIFICATION <" + msg + ">: Successful")

        shell = RemoteMachineShellConnection(self.master)
        shell.execute_cbstats("", "raw", keyname="allocator", vbid="")
        shell.disconnect()
Example #24
0
 def _get_backfill_timestamp(self, server, replica_server):
     param = 'tap'
     stat_key = 'eq_tapq:replication_ns_1@%s:backfill_start_timestamp' % (replica_server.ip)
     m_stats = StatsCommon.get_stats([server], self.bucket, param, stat_key)
     self.log.info("eq_tapq:replication_ns_1@%s:backfill_start_timestamp: %s" % (replica_server.ip, m_stats[m_stats.keys()[0]]))
     return int(m_stats[m_stats.keys()[0]])
Example #25
0
    def test_verify_mb8825(self):
        # Setting up replication clusters.
        src_cluster_name, dest_cluster_name = "remote-dest-src", "remote-src-dest"
        self.__setup_replication_clusters(self.src_master, self.dest_master,
                                          src_cluster_name, dest_cluster_name)

        # Step-3 Load 10k items ( sets=80, deletes=20) on source cluster.
        self._load_all_buckets(self.src_master, self.gen_create, "create", 0)

        # Step-4 XDCR Source -> Remote
        self._replicate_clusters(self.src_master, dest_cluster_name)
        self.merge_buckets(self.src_master,
                           self.dest_master,
                           bidirection=False)

        # Step-5 Wait for replication to finish 50% at destination node
        expected_items = (self.gen_create.end) * 0.5
        dest_master_buckets = self._get_cluster_buckets(self.dest_master)

        tasks = []
        for bucket in dest_master_buckets:
            tasks.append(
                self.cluster.async_wait_for_stats([self.dest_master], bucket,
                                                  '', 'curr_items', '>=',
                                                  expected_items))
        for task in tasks:
            task.result(self.wait_timeout * 5)

        # Perform 20% delete on Source cluster.
        tasks = []
        self.gen_delete = BlobGenerator(
            'loadOne',
            'loadOne-',
            self._value_size,
            start=0,
            end=int((self.num_items) * (float)(self._percent_delete) / 100))
        tasks.extend(
            self._async_load_all_buckets(self.src_master, self.gen_delete,
                                         "delete", 0))

        # Step-6 XDCR Remote -> Source
        self._replicate_clusters(self.dest_master, src_cluster_name)
        self.merge_buckets(self.dest_master,
                           self.src_master,
                           bidirection=False)

        # Wait for delete tasks to be finished
        for task in tasks:
            task.result()

        # Step-8 Compare the source and destination cluster items - item count, meta data, data content.
        self.verify_results()

        # Verify if no deletion performed at source node:
        src_master_buckets = self._get_cluster_buckets(self.src_master)
        for bucket in src_master_buckets:
            src_stat_ep_num_ops_del_meta = 0
            src_stat_ep_num_ops_set_meta = 0
            src_stat_ep_num_ops_get_meta = 0
            src_stat_ep_num_ops_del_meta_res_fail = 0
            src_stat_ep_num_ops_set_meta_res_fail = 0
            for src_node in self.src_nodes:
                src_stat_ep_num_ops_del_meta += int(
                    StatsCommon.get_stats([src_node], bucket, '',
                                          'ep_num_ops_del_meta')[src_node])
                src_stat_ep_num_ops_set_meta += int(
                    StatsCommon.get_stats([src_node], bucket, '',
                                          'ep_num_ops_set_meta')[src_node])
                src_stat_ep_num_ops_get_meta += int(
                    StatsCommon.get_stats([src_node], bucket, '',
                                          'ep_num_ops_get_meta')[src_node])
                src_stat_ep_num_ops_del_meta_res_fail += int(
                    StatsCommon.get_stats(
                        [src_node], bucket, '',
                        'ep_num_ops_del_meta_res_fail')[src_node])
                src_stat_ep_num_ops_set_meta_res_fail += int(
                    StatsCommon.get_stats(
                        [src_node], bucket, '',
                        'ep_num_ops_set_meta_res_fail')[src_node])

            self.assertEqual(
                src_stat_ep_num_ops_set_meta, 0,
                "Number of set [%s] operation occurs at bucket = %s, while expected to 0"
                % (src_stat_ep_num_ops_set_meta, bucket))
            self.assertEqual(
                src_stat_ep_num_ops_del_meta, 0,
                "Number of delete [%s] operation occurs at bucket = %s, while expected to 0"
                % (src_stat_ep_num_ops_del_meta, bucket))

            dest_stat_ep_num_ops_del_meta = 0
            for dest_node in self.dest_nodes:
                dest_stat_ep_num_ops_del_meta += int(
                    StatsCommon.get_stats([dest_node], bucket, '',
                                          'ep_num_ops_del_meta')[dest_node])

            if self.rep_type == "xmem":
                self.assertEqual(
                    src_stat_ep_num_ops_del_meta_res_fail,
                    dest_stat_ep_num_ops_del_meta,
                    "Number of failed delete [%s] operation occurs at bucket = %s, while expected to %s"
                    % (src_stat_ep_num_ops_del_meta_res_fail, bucket,
                       dest_stat_ep_num_ops_del_meta))
                self.assertTrue(
                    src_stat_ep_num_ops_set_meta_res_fail == 0,
                    "Number of failed set [%s] operation occurs at bucket = %s, while expected 0"
                    % (src_stat_ep_num_ops_set_meta_res_fail, bucket))

            elif self.rep_type == "capi":
                self.assertTrue(
                    src_stat_ep_num_ops_get_meta > 0,
                    "Number of get [%s] operation occurs at bucket = %s, while expected greater than 0"
                    % (src_stat_ep_num_ops_get_meta, bucket))
Example #26
0
    def test_verify_mb8825(self):
        # Setting up replication clusters.
        src_cluster_name, dest_cluster_name = "remote-dest-src", "remote-src-dest"
        self.__setup_replication_clusters(self.src_master, self.dest_master, src_cluster_name, dest_cluster_name)

        # Step-3 Load 10k items ( sets=80, deletes=20) on source cluster.
        self._load_all_buckets(self.src_master, self.gen_create, "create", 0)

        # Step-4 XDCR Source -> Remote
        self._replicate_clusters(self.src_master, dest_cluster_name)
        self.merge_buckets(self.src_master, self.dest_master, bidirection=False)

        # Step-5 Wait for replication to finish 50% at destination node
        expected_items = (self.gen_create.end) * 0.5
        dest_master_buckets = self._get_cluster_buckets(self.dest_master)

        tasks = []
        for bucket in dest_master_buckets:
            tasks.append(self.cluster.async_wait_for_stats([self.dest_master], bucket, '', 'curr_items', '>=', expected_items))
        for task in tasks:
            task.result(self.wait_timeout * 5)

        # Perform 20% delete on Source cluster.
        tasks = []
        self.gen_delete = BlobGenerator('loadOne', 'loadOne-', self._value_size, start=0, end=int((self.num_items) * (float)(self._percent_delete) / 100))
        tasks.extend(self._async_load_all_buckets(self.src_master, self.gen_delete, "delete", 0))

        # Step-6 XDCR Remote -> Source
        self._replicate_clusters(self.dest_master, src_cluster_name)
        self.merge_buckets(self.dest_master, self.src_master, bidirection=False)

        # Wait for delete tasks to be finished
        for task in tasks:
            task.result()

        # Step-8 Compare the source and destination cluster items - item count, meta data, data content.
        self.verify_results()

        # Verify if no deletion performed at source node:
        src_master_buckets = self._get_cluster_buckets(self.src_master)
        for bucket in src_master_buckets:
            src_stat_ep_num_ops_del_meta = 0
            src_stat_ep_num_ops_set_meta = 0
            src_stat_ep_num_ops_get_meta = 0
            src_stat_ep_num_ops_del_meta_res_fail = 0
            src_stat_ep_num_ops_set_meta_res_fail = 0
            for src_node in self.src_nodes:
                src_stat_ep_num_ops_del_meta += int(StatsCommon.get_stats([src_node], bucket, '', 'ep_num_ops_del_meta')[src_node])
                src_stat_ep_num_ops_set_meta += int(StatsCommon.get_stats([src_node], bucket, '', 'ep_num_ops_set_meta')[src_node])
                src_stat_ep_num_ops_get_meta += int(StatsCommon.get_stats([src_node], bucket, '', 'ep_num_ops_get_meta')[src_node])
                src_stat_ep_num_ops_del_meta_res_fail += int(StatsCommon.get_stats([src_node], bucket, '', 'ep_num_ops_del_meta_res_fail')[src_node])
                src_stat_ep_num_ops_set_meta_res_fail += int(StatsCommon.get_stats([src_node], bucket, '', 'ep_num_ops_set_meta_res_fail')[src_node])

            self.assertEqual(src_stat_ep_num_ops_set_meta, 0, "Number of set [%s] operation occurs at bucket = %s, while expected to 0" % (src_stat_ep_num_ops_set_meta, bucket))
            self.assertEqual(src_stat_ep_num_ops_del_meta, 0, "Number of delete [%s] operation occurs at bucket = %s, while expected to 0" % (src_stat_ep_num_ops_del_meta, bucket))

            dest_stat_ep_num_ops_del_meta = 0
            for dest_node in self.dest_nodes:
                dest_stat_ep_num_ops_del_meta += int(StatsCommon.get_stats([dest_node], bucket, '', 'ep_num_ops_del_meta')[dest_node])

            if self.rep_type == "xmem":
                self.assertEqual(src_stat_ep_num_ops_del_meta_res_fail, dest_stat_ep_num_ops_del_meta, "Number of failed delete [%s] operation occurs at bucket = %s, while expected to %s" % (src_stat_ep_num_ops_del_meta_res_fail, bucket, dest_stat_ep_num_ops_del_meta))
                self.assertTrue(src_stat_ep_num_ops_set_meta_res_fail > 0, "Number of failed set [%s] operation occurs at bucket = %s, while expected greater than 0" % (src_stat_ep_num_ops_set_meta_res_fail, bucket))

            elif self.rep_type == "capi":
                self.assertTrue(src_stat_ep_num_ops_get_meta > 0, "Number of get [%s] operation occurs at bucket = %s, while expected greater than 0" % (src_stat_ep_num_ops_get_meta, bucket))
Example #27
0
    def test_items_append(self):
        self.desired_item_size = self.input.param("desired_item_size", 2048)
        self.append_size = self.input.param("append_size", 1024)
        self.fixed_append_size = self.input.param("fixed_append_size", True)
        self.append_ratio = self.input.param("append_ratio", 0.5)
        self._load_all_buckets(self.master, self.gen_create, "create", 0,
                               batch_size=1000, pause_secs=5, timeout_secs=100)

        for bucket in self.buckets:
            self.value_size = self.input.param("value_size", 512)
            verify_dict = {}
            vkeys, dkeys = bucket.kvs[1].key_set()

            key_count = len(vkeys)
            app_ratio = self.append_ratio * key_count
            selected_keys = []
            i = 0
            for key in vkeys:
                i += 1
                if i >= app_ratio:
                    break
                selected_keys.append(key)

            awareness = VBucketAwareMemcached(RestConnection(self.master), bucket.name)
            if self.kv_verify:
                for key in selected_keys:
                    value = awareness.memcached(key).get(key)[2]
                    verify_dict[key] = value

            self.log.info("Bucket: {0}".format(bucket.name))
            self.log.info("Appending to have items whose initial size was "
                            + "{0} to equal or cross a size of {1}".format(self.value_size, self.desired_item_size))
            self.log.info("Item-appending of {0} items starting ..".format(len(selected_keys) + 1))

            index = 3
            while self.value_size < self.desired_item_size:
                str_len = self.append_size
                if not self.fixed_append_size:
                    str_len = int(math.pow(2, index))

                for key in selected_keys:
                    random_string = self.random_str_generator(str_len)
                    awareness.memcached(key).append(key, random_string)
                    if self.kv_verify:
                        verify_dict[key] = verify_dict[key] + random_string
                self.log.info("for {0} items size was increased to {1} Bytes".format(len(selected_keys) + 1, self.value_size))
                self.value_size += str_len
                index += 1

            self.log.info("The appending of {0} items ended".format(len(selected_keys) + 1))

        for bucket in self.buckets:
            msg = "Bucket:{0}".format(bucket.name)
            self.log.info("VERIFICATION <" + msg + ">: Phase 0 - Check the gap between "
                      + "mem_used by the bucket and total_allocated_bytes")
            stats = StatsCommon()
            mem_used_stats = stats.get_stats(self.servers, bucket, 'memory', 'mem_used')
            total_allocated_bytes_stats = stats.get_stats(self.servers, bucket, 'memory', 'total_allocated_bytes')
            total_fragmentation_bytes_stats = stats.get_stats(self.servers, bucket, 'memory', 'total_fragmentation_bytes')

            for server in self.servers:
                self.log.info("In {0} bucket {1}, total_fragmentation_bytes + the total_allocated_bytes = {2}"
                              .format(server.ip, bucket.name, (int(total_fragmentation_bytes_stats[server]) + int(total_allocated_bytes_stats[server]))))
                self.log.info("In {0} bucket {1}, mem_used = {2}".format(server.ip, bucket.name, mem_used_stats[server]))
                self.log.info("In {0} bucket {1}, the difference between actual memory used by memcached and mem_used is {2} times"
                              .format(server.ip, bucket.name, float(int(total_fragmentation_bytes_stats[server]) + int(total_allocated_bytes_stats[server])) / float(mem_used_stats[server])))


            self.log.info("VERIFICATION <" + msg + ">: Phase1 - Check if any of the "
                    + "selected keys have value less than the desired value size")
            for key in selected_keys:
                value = awareness.memcached(key).get(key)[2]
                if len(value) < self.desired_item_size:
                    self.fail("Failed to append enough to make value size surpass the "
                                + "size {0}, key {1} has size {2}".format(self.desired_item_size, key, len(value)))

            if self.kv_verify:
                self.log.info("VERIFICATION <" + msg + ">: Phase2 - Check if the content "
                        + "after the appends match what's expected")
                for k in verify_dict:
                    if awareness.memcached(k).get(k)[2] != verify_dict[k]:
                        self.fail("Content at key {0}: not what's expected.".format(k))
                self.log.info("VERIFICATION <" + msg + ">: Successful")

        shell = RemoteMachineShellConnection(self.master)
        shell.execute_cbstats("", "raw", keyname="allocator", vbid="")
        shell.disconnect()
Example #28
0
    def checkpoint_replication_pause_failover(self):
        """Load N items. Stop replication R3. Load N' more items.
        Failover R2. When restart replication to R3, verify backfill doesn't happen on R1."""

        param = 'checkpoint'
        stat_key = 'vb_0:open_checkpoint_id'
        rest = RestConnection(self.master)
        nodes = rest.node_statuses()
        failover_node = None
        for node in nodes:
            if node.id.find(self.replica2.ip) >= 0:
                failover_node = node

        self._set_checkpoint_size(self.servers[:self.num_servers], self.bucket,
                                  self.checkpoint_size)
        generate_load_one = BlobGenerator('nosql',
                                          'nosql-',
                                          self.value_size,
                                          end=self.num_items)
        self._load_all_buckets(self.master,
                               generate_load_one,
                               "create",
                               0,
                               1,
                               0,
                               True,
                               batch_size=self.checkpoint_size,
                               pause_secs=5,
                               timeout_secs=180)
        self._wait_for_stats_all_buckets(self.servers[:self.num_servers])
        prev_backfill_timestamp_R1 = self._get_backfill_timestamp(
            self.replica1, self.replica2)
        m_stats = StatsCommon.get_stats([self.master], self.bucket, param,
                                        stat_key)
        self._stop_replication(self.replica3, self.bucket)

        generate_load_two = BlobGenerator('sqlite',
                                          'sqlite-',
                                          self.value_size,
                                          end=self.num_items)
        data_load_thread = Thread(target=self._load_all_buckets,
                                  name="load_data",
                                  args=(self.master, generate_load_two,
                                        "create", 0, 1, 0, True,
                                        self.checkpoint_size, 5, 180))
        data_load_thread.start()

        failed_over = rest.fail_over(failover_node.id)
        if not failed_over:
            self.log.info(
                "unable to failover the node the first time. try again in  60 seconds.."
            )
            #try again in 60 seconds
            time.sleep(75)
            failed_over = rest.fail_over(failover_node.id)
        self.assertTrue(failed_over,
                        "unable to failover node %s".format(self.replica2.ip))
        self.log.info("failed over node : {0}".format(failover_node.id))
        data_load_thread.join()
        self._start_replication(self.replica3, self.bucket)

        self.servers = []
        self.servers = [self.master, self.replica1, self.replica3]
        self.num_servers = len(self.servers)
        self._verify_checkpoint_id(param, stat_key, m_stats)
        self._verify_stats_all_buckets(self.servers[:self.num_servers])
        self._verify_backfill_happen(self.replica1, self.replica2,
                                     prev_backfill_timestamp_R1)
        self.cluster.rebalance(
            [self.master, self.replica1, self.replica2, self.replica3], [],
            [self.replica2])
        self.cluster.rebalance([self.master, self.replica1, self.replica3],
                               [self.replica2], [])
Example #29
0
    def checkpoint_deduplication(self):
        """Disable replication of R1. Load N items to master, then mutate some of them.
        Restart replication of R1, only N items should be in stats. In this test, we can
        only load number of items <= checkpoint_size to observe deduplication"""

        param = 'checkpoint'
        stat_key = 'vb_0:num_open_checkpoint_items'
        stat_key_id = 'vb_0:open_checkpoint_id'

        self._set_checkpoint_size(self.servers[:self.num_servers], self.bucket,
                                  self.checkpoint_size)
        self._stop_replication(self.replica1, self.bucket)

        generate_load = BlobGenerator('nosql',
                                      'nosql-',
                                      self.value_size,
                                      end=self.num_items)
        generate_update = BlobGenerator('nosql',
                                        'sql-',
                                        self.value_size,
                                        end=self.num_items)
        self._load_all_buckets(self.master,
                               generate_load,
                               "create",
                               0,
                               1,
                               0,
                               True,
                               batch_size=self.checkpoint_size,
                               pause_secs=5,
                               timeout_secs=180)
        self._wait_for_stats_all_buckets(
            [self.master, self.replica2, self.replica3])
        m_stats = StatsCommon.get_stats([self.master], self.bucket, param,
                                        stat_key_id)
        data_load_thread = Thread(target=self._load_all_buckets,
                                  name="load_data",
                                  args=(self.master, generate_update, "update",
                                        0, 1, 0, True, self.checkpoint_size, 5,
                                        180))
        data_load_thread.start()
        self._start_replication(self.replica1, self.bucket)
        data_load_thread.join()

        chk_pnt = int(m_stats[m_stats.keys()[0]])
        timeout = 60 if (self.num_items * .001) < 60 else self.num_items * .001
        time.sleep(timeout)
        tasks = []
        tasks.append(
            self.cluster.async_wait_for_stats([self.master], self.bucket,
                                              param, stat_key, '==',
                                              self.num_items))
        tasks.append(
            self.cluster.async_wait_for_stats([self.replica1], self.bucket,
                                              param, stat_key, '==',
                                              self.num_items))
        tasks.append(
            self.cluster.async_wait_for_stats([self.master], self.bucket,
                                              param, stat_key_id, '==',
                                              chk_pnt))
        tasks.append(
            self.cluster.async_wait_for_stats([self.replica1], self.bucket,
                                              param, stat_key_id, '==',
                                              chk_pnt))

        for task in tasks:
            try:
                task.result(60)
            except TimeoutError:
                self.fail("Items weren't deduplicated")

        self._verify_stats_all_buckets(self.servers[:self.num_servers])