Esempio n. 1
0
class StorageBase(BaseTestCase):
    def setUp(self):
        super(StorageBase, self).setUp()
        self.rest = RestConnection(self.cluster.master)
        self.data_path = self.fetch_data_path()

        # Bucket Params
        self.vbuckets = self.input.param("vbuckets", self.cluster.vbuckets)
        self.bucket_ram_quota = self.input.param("bucket_ram_quota", None)
        self.fragmentation = int(self.input.param("fragmentation", 50))
        self.bucket_storage = self.input.param("bucket_storage",
                                               Bucket.StorageBackend.magma)
        self.bucket_eviction_policy = self.input.param(
            "bucket_eviction_policy", Bucket.EvictionPolicy.FULL_EVICTION)
        self.bucket_util.add_rbac_user(self.cluster.master)
        self.bucket_name = self.input.param("bucket_name", None)
        self.magma_buckets = self.input.param("magma_buckets", 0)

        # SDK Exceptions
        self.check_temporary_failure_exception = False
        self.retry_exceptions = [
            SDKException.TimeoutException,
            SDKException.AmbiguousTimeoutException,
            SDKException.RequestCanceledException,
            SDKException.UnambiguousTimeoutException,
            SDKException.ServerOutOfMemoryException,
            SDKException.DurabilityAmbiguousException
        ]
        self.ignore_exceptions = []

        # Sets autocompaction at bucket level
        self.autoCompactionDefined = str(
            self.input.param("autoCompactionDefined", "false")).lower()

        # Create Cluster
        self.rest.init_cluster(username=self.cluster.master.rest_username,
                               password=self.cluster.master.rest_password)

        nodes_init = self.cluster.servers[1:self.nodes_init]
        self.services = ["kv"] * self.nodes_init

        self.dcp_services = self.input.param("dcp_services", None)
        self.dcp_servers = []
        if self.dcp_services:
            server = self.rest.get_nodes_self()
            self.rest.set_service_mem_quota({
                CbServer.Settings.INDEX_MEM_QUOTA:
                int(server.mcdMemoryReserved - 100)
            })
            self.dcp_services = [
                service.replace(":", ",")
                for service in self.dcp_services.split("-")
            ]
            self.services.extend(self.dcp_services)
            self.dcp_servers = self.cluster.servers[self.nodes_init:self.
                                                    nodes_init +
                                                    len(self.dcp_services)]
        nodes_in = nodes_init + self.dcp_servers
        result = self.task.rebalance([self.cluster.master],
                                     nodes_in, [],
                                     services=self.services[1:])
        self.assertTrue(result, "Initial rebalance failed")
        self.cluster.nodes_in_cluster.extend([self.cluster.master] + nodes_in)
        for idx, node in enumerate(self.cluster.nodes_in_cluster):
            node.services = self.services[idx]

        # Create Buckets
        if self.standard_buckets == 1:
            self.bucket_util.create_default_bucket(
                self.cluster,
                bucket_type=self.bucket_type,
                ram_quota=self.bucket_ram_quota,
                replica=self.num_replicas,
                storage=self.bucket_storage,
                eviction_policy=self.bucket_eviction_policy,
                autoCompactionDefined=self.autoCompactionDefined,
                fragmentation_percentage=self.fragmentation,
                flush_enabled=self.flush_enabled)
        else:
            buckets_created = self.bucket_util.create_multiple_buckets(
                self.cluster,
                self.num_replicas,
                bucket_count=self.standard_buckets,
                bucket_type=self.bucket_type,
                storage={
                    "couchstore": self.standard_buckets - self.magma_buckets,
                    "magma": self.magma_buckets
                },
                eviction_policy=self.bucket_eviction_policy,
                bucket_name=self.bucket_name,
                fragmentation_percentage=self.fragmentation,
                flush_enabled=self.flush_enabled)
            self.assertTrue(buckets_created,
                            "Unable to create multiple buckets")

        self.buckets = self.cluster.buckets

        # sel.num_collections=1 signifies only default collection
        self.num_collections = self.input.param("num_collections", 1)
        self.num_scopes = self.input.param("num_scopes", 1)

        # Creation of scopes of num_scopes is > 1
        scope_prefix = "Scope"
        for bucket in self.cluster.buckets:
            for i in range(1, self.num_scopes):
                scope_name = scope_prefix + str(i)
                self.log.info("Creating bucket::scope {} {}\
                ".format(bucket.name, scope_name))
                self.bucket_util.create_scope(self.cluster.master, bucket,
                                              {"name": scope_name})
                self.sleep(2)
        self.scopes = self.buckets[0].scopes.keys()
        self.log.info("Scopes list is {}".format(self.scopes))

        collection_prefix = "FunctionCollection"
        # Creation of collection of num_collections is > 1
        for bucket in self.cluster.buckets:
            for scope_name in self.scopes:
                for i in range(1, self.num_collections):
                    collection_name = collection_prefix + str(i)
                    self.log.info("Creating scope::collection {} {}\
                    ".format(scope_name, collection_name))
                    self.bucket_util.create_collection(
                        self.cluster.master, bucket, scope_name,
                        {"name": collection_name})
                    self.sleep(2)
        self.collections = self.buckets[0].scopes[
            CbServer.default_scope].collections.keys()
        self.log.debug("Collections list == {}".format(self.collections))

        if self.dcp_services and self.num_collections == 1:
            self.initial_idx = "initial_idx"
            self.initial_idx_q = "CREATE INDEX %s on default:`%s`.`%s`.`%s`(meta().id) with \
                {\"defer_build\": false};" % (
                self.initial_idx, self.buckets[0].name, CbServer.default_scope,
                self.collections[0])
            self.query_client = RestConnection(self.dcp_servers[0])
            result = self.query_client.query_tool(self.initial_idx_q)
            self.assertTrue(result["status"] == "success",
                            "Index query failed!")

        # Doc controlling params
        self.key = 'test_docs'
        self.key_size = self.input.param("key_size", 8)
        if self.random_key:
            self.key = "random_keys"
            '''
              With Small key size, when random.random() generate 0.0,
              Key size becomes bigger than the 250 bytes
              (L 259 in documentgenerator.py)
            '''
            self.key_size = self.input.param("key_size", 20)

        self.doc_ops = self.input.param("doc_ops", "create")
        self.doc_size = self.input.param("doc_size", 2048)
        self.gen_create = None
        self.gen_delete = None
        self.gen_read = None
        self.gen_update = None
        self.gen_expiry = None
        self.create_perc = self.input.param("update_perc", 100)
        self.update_perc = self.input.param("update_perc", 0)
        self.delete_perc = self.input.param("delete_perc", 0)
        self.expiry_perc = self.input.param("expiry_perc", 0)
        self.start = 0
        self.end = 0
        self.create_start = None
        self.create_end = None
        self.update_start = None
        self.update_end = None
        self.delete_start = None
        self.delete_end = None
        self.read_start = None
        self.read_end = None
        self.expiry_start = None
        self.expiry_end = None
        self.mutate = 0
        self.init_items_per_collection = self.num_items
        '''
           --For DGM test
                  -self.init_items_per collection will overwrite in
                    load_buckets_in_dgm method

           --For Non-DGM tests in MultiCollection environment,
                  -self.num_items will be updated after doc loading

           -- self.init_num_items is needed to preserve initial
              doc count given in test
        '''
        self.init_num_items = self.num_items
        self.maxttl = self.input.param("maxttl", 10)

        # Common test params
        self.test_itr = self.input.param("test_itr", 4)
        self.update_itr = self.input.param("update_itr", 2)
        self.next_half = self.input.param("next_half", False)
        self.deep_copy = self.input.param("deep_copy", False)
        self.suppress_error_table = True
        self.skip_read_on_error = False
        self.track_failures = True

    def _loader_dict(self):
        loader_dict = dict()
        common_params = {
            "retry_exceptions": self.retry_exceptions,
            "suppress_error_table": self.suppress_error_table,
            "durability_level": self.durability_level,
            "skip_read_success_results": False,
            "target_items": 5000,
            "skip_read_on_error": self.skip_read_on_error,
            "track_failures": self.track_failures,
            "ignore_exceptions": self.ignore_exceptions,
            "sdk_timeout_unit": self.time_unit,
            "sdk_timeout": self.sdk_timeout,
            "doc_ttl": 0,
            "doc_gen_type": "default"
        }
        for bucket in self.cluster.buckets:
            loader_dict.update({bucket: dict()})
            loader_dict[bucket].update({"scopes": dict()})
            for scope in bucket.scopes.keys():
                loader_dict[bucket]["scopes"].update({scope: dict()})
                loader_dict[bucket]["scopes"][scope].update(
                    {"collections": dict()})
                for collection in bucket.scopes[scope].collections.keys():
                    loader_dict[bucket]["scopes"][scope]["collections"].update(
                        {collection: dict()})
                    if self.gen_update is not None:
                        op_type = "update"
                        common_params.update({"doc_gen": self.gen_update})
                        loader_dict[bucket]["scopes"][scope]["collections"][
                            collection][op_type] = copy.deepcopy(common_params)
                    if self.gen_create is not None:
                        op_type = "create"
                        common_params.update({"doc_gen": self.gen_create})
                        loader_dict[bucket]["scopes"][scope]["collections"][
                            collection][op_type] = copy.deepcopy(common_params)
                    if self.gen_delete is not None:
                        op_type = "delete"
                        common_params.update({"doc_gen": self.gen_delete})
                        loader_dict[bucket]["scopes"][scope]["collections"][
                            collection][op_type] = copy.deepcopy(common_params)
                    if self.gen_expiry is not None and self.maxttl:
                        op_type = "update"
                        common_params.update({
                            "doc_gen": self.gen_expiry,
                            "doc_ttl": self.maxttl
                        })
                        loader_dict[bucket]["scopes"][scope]["collections"][
                            collection][op_type] = copy.deepcopy(common_params)
                        common_params.update({"doc_ttl": 0})
                    if self.gen_read is not None:
                        op_type = "read"
                        common_params.update({
                            "doc_gen": self.gen_read,
                            "skip_read_success_results": True,
                            "track_failures": False,
                            "suppress_error_table": True
                        })
                        loader_dict[bucket]["scopes"][scope]["collections"][
                            collection][op_type] = common_params
        self.loader_dict = loader_dict

    def doc_loader(self, loader_spec):
        task = self.task.async_load_gen_docs_from_spec(
            self.cluster,
            self.task_manager,
            loader_spec,
            self.sdk_client_pool,
            batch_size=self.batch_size,
            process_concurrency=self.process_concurrency,
            print_ops_rate=True,
            start_task=True,
            track_failures=self.track_failures)

        return task

    def data_load(self):
        self._loader_dict()
        return self.doc_loader(self.loader_dict)

    def wait_for_doc_load_completion(self, task, wait_for_stats=True):
        self.task_manager.get_task_result(task)
        self.bucket_util.validate_doc_loading_results(task)
        if not task.result:
            self.assertTrue(
                task.result,
                "Doc ops failed for task: {}".format(task.thread_name))

        if wait_for_stats:
            try:
                self.bucket_util._wait_for_stats_all_buckets(
                    self.cluster, self.cluster.buckets, timeout=1800)
            except Exception as e:
                raise e

    def initial_load(self):
        self.create_start = 0
        self.create_end = self.init_items_per_collection
        if self.rev_write:
            self.create_start = -int(self.init_items_per_collection - 1)
            self.create_end = 1

        self.generate_docs(doc_ops="create")

        self.log.debug("initial_items_in_each_collection {}".format(
            self.init_items_per_collection))
        task = self.data_load()
        self.wait_for_doc_load_completion(task)

        self.num_items = self.init_items_per_collection * self.num_collections
        self.read_start = 0
        self.read_end = self.init_items_per_collection

    def load_buckets_in_dgm(self,
                            kv_gen,
                            op_type,
                            exp,
                            flag=0,
                            batch_size=1000,
                            timeout_secs=30,
                            compression=True,
                            skip_read_on_error=False,
                            suppress_error_table=False,
                            track_failures=False):
        tasks_info = dict()
        self.collections.remove(CbServer.default_collection)
        docs_per_task = dict()
        docs_per_scope = dict.fromkeys(self.scopes, dict())
        for scope in self.scopes:
            task_per_collection = dict()
            if scope == CbServer.default_scope:
                self.collections.append(CbServer.default_collection)
            for collection in self.collections:
                task_info = self.bucket_util._async_load_all_buckets(
                    self.cluster,
                    kv_gen,
                    op_type,
                    exp,
                    flag,
                    persist_to=self.persist_to,
                    replicate_to=self.replicate_to,
                    durability=self.durability_level,
                    timeout_secs=timeout_secs,
                    time_unit=self.time_unit,
                    batch_size=batch_size,
                    sdk_compression=compression,
                    process_concurrency=self.process_concurrency,
                    retry_exceptions=self.retry_exceptions,
                    active_resident_threshold=self.active_resident_threshold,
                    skip_read_on_error=skip_read_on_error,
                    suppress_error_table=suppress_error_table,
                    dgm_batch=self.dgm_batch,
                    scope=scope,
                    collection=collection,
                    monitor_stats=self.monitor_stats,
                    track_failures=track_failures,
                    sdk_client_pool=self.sdk_client_pool)
                tasks_info.update(task_info.items())
                task_per_collection[collection] = list(task_info.keys())[0]
            if scope == CbServer.default_scope:
                self.collections.remove(CbServer.default_collection)
            docs_per_scope[scope] = task_per_collection
        for task in tasks_info.keys():
            self.task_manager.get_task_result(task)
        if self.active_resident_threshold < 100:
            for task, _ in tasks_info.items():
                docs_per_task[task] = task.doc_index
            self.log.info("docs_per_task : {}".format(docs_per_task))
            for scope in self.scopes:
                for collection in self.collections:
                    docs_per_scope[scope][collection] = docs_per_task[
                        docs_per_scope[scope][collection]]
            docs_per_scope[CbServer.default_scope][
                CbServer.default_collection] = docs_per_task[docs_per_scope[
                    CbServer.default_scope][CbServer.default_collection]]
        self.log.info("docs_per_scope :  {}".format(docs_per_scope))
        # For DGM TESTS, init_items_per_collection ==  max(list of items in each collection)
        self.init_items_per_collection = max(
            [max(docs_per_scope[scope].values()) for scope in docs_per_scope])
        self.log.info("init_items_per_collection =={} ".format(
            self.init_items_per_collection))

    def tearDown(self):
        self.cluster_util.print_cluster_stats(self.cluster)
        dgm = None
        timeout = 60
        while dgm is None and timeout > 0:
            try:
                stats = BucketHelper(self.cluster.master).fetch_bucket_stats(
                    self.buckets[0].name)
                dgm = stats["op"]["samples"]["vb_active_resident_items_ratio"][
                    -1]
                self.log.info(
                    "## Active Resident Threshold of {0} is {1} ##".format(
                        self.buckets[0].name, dgm))
            except:
                self.log.debug(
                    "Fetching vb_active_resident_items_ratio(dgm) failed...retying"
                )
                timeout -= 1
                time.sleep(1)

        super(StorageBase, self).tearDown()

    def genrate_docs_basic(self, start, end, target_vbucket=None, mutate=0):
        return doc_generator(self.key,
                             start,
                             end,
                             doc_size=self.doc_size,
                             doc_type=self.doc_type,
                             target_vbucket=target_vbucket,
                             vbuckets=self.cluster.vbuckets,
                             key_size=self.key_size,
                             randomize_doc_size=self.randomize_doc_size,
                             randomize_value=self.randomize_value,
                             mix_key_size=self.mix_key_size,
                             mutate=mutate,
                             deep_copy=self.deep_copy)

    def generate_docs(self,
                      doc_ops=None,
                      target_vbucket=None,
                      create_end=None,
                      create_start=None,
                      create_mutate=0,
                      update_end=None,
                      update_start=None,
                      update_mutate=0,
                      read_end=None,
                      read_start=None,
                      read_mutate=0,
                      delete_end=None,
                      delete_start=None,
                      expiry_end=None,
                      expiry_start=None,
                      expiry_mutate=0):

        doc_ops = doc_ops or self.doc_ops

        if "update" in doc_ops:
            if update_start is not None:
                self.update_start = update_start
            if update_end is not None:
                self.update_end = update_end

            if self.update_start is None:
                self.update_start = self.start
            if self.update_end is None:
                self.update_end = self.end * self.update_perc / 100

            self.mutate += 1
            self.gen_update = self.genrate_docs_basic(
                self.update_start,
                self.update_end,
                target_vbucket=target_vbucket,
                mutate=self.mutate)
        if "delete" in doc_ops:
            if delete_start is not None:
                self.delete_start = delete_start
            if delete_end is not None:
                self.delete_end = delete_end

            if self.delete_start is None:
                self.delete_start = self.start
            if self.delete_end is None:
                self.delete_end = self.end * self.delete_perc / 100

            self.gen_delete = self.genrate_docs_basic(
                self.delete_start,
                self.delete_end,
                target_vbucket=target_vbucket,
                mutate=read_mutate)
        if "create" in doc_ops:
            if create_start is not None:
                self.create_start = create_start
            if self.create_start is None:
                self.create_start = self.end
            self.start = self.create_start

            if create_end is not None:
                self.create_end = create_end
            if self.create_end is None:
                self.create_end = self.start + self.num_items * self.create_perc / 100
            self.end = self.create_end

            self.gen_create = self.genrate_docs_basic(
                self.create_start,
                self.create_end,
                target_vbucket=target_vbucket,
                mutate=create_mutate)
        if "read" in doc_ops:
            if read_start is not None:
                self.read_start = read_start
            if read_end is not None:
                self.read_end = read_end

            if self.read_start is None:
                self.read_start = self.create_start
            if self.read_end is None:
                self.read_end = self.create_end

            self.gen_read = self.genrate_docs_basic(
                self.read_start,
                self.read_end,
                target_vbucket=target_vbucket,
                mutate=read_mutate)
        if "expiry" in doc_ops:
            if expiry_start is not None:
                self.expiry_start = expiry_start
            elif self.expiry_start is None:
                self.expiry_start = self.start + (self.num_items *
                                                  self.delete_perc) / 100

            if expiry_end is not None:
                self.expiry_end = expiry_end
            elif self.expiry_end is None:
                self.expiry_end = self.start+self.num_items *\
                                  (self.delete_perc + self.expiry_perc)/100

            self.gen_expiry = self.genrate_docs_basic(
                self.expiry_start,
                self.expiry_end,
                target_vbucket=target_vbucket,
                mutate=expiry_mutate)

    def loadgen_docs(self,
                     retry_exceptions=[],
                     ignore_exceptions=[],
                     skip_read_on_error=False,
                     suppress_error_table=False,
                     scope=CbServer.default_scope,
                     collection=CbServer.default_collection,
                     _sync=True,
                     track_failures=True,
                     doc_ops=None,
                     sdk_retry_strategy=None):
        doc_ops = doc_ops or self.doc_ops

        tasks_info = dict()
        read_tasks_info = dict()
        read_task = False

        if self.check_temporary_failure_exception:
            retry_exceptions.append(SDKException.TemporaryFailureException)

        if "update" in doc_ops and self.gen_update is not None:
            tem_tasks_info = self.bucket_util._async_load_all_buckets(
                self.cluster,
                self.gen_update,
                "update",
                0,
                batch_size=self.batch_size,
                process_concurrency=self.process_concurrency,
                persist_to=self.persist_to,
                replicate_to=self.replicate_to,
                durability=self.durability_level,
                timeout_secs=self.sdk_timeout,
                retries=self.sdk_retries,
                time_unit=self.time_unit,
                retry_exceptions=retry_exceptions,
                ignore_exceptions=ignore_exceptions,
                skip_read_on_error=skip_read_on_error,
                suppress_error_table=suppress_error_table,
                scope=scope,
                collection=collection,
                monitor_stats=self.monitor_stats,
                track_failures=track_failures,
                sdk_client_pool=self.sdk_client_pool,
                sdk_retry_strategy=sdk_retry_strategy)
            tasks_info.update(tem_tasks_info.items())
        if "create" in doc_ops and self.gen_create is not None:
            tem_tasks_info = self.bucket_util._async_load_all_buckets(
                self.cluster,
                self.gen_create,
                "create",
                0,
                batch_size=self.batch_size,
                process_concurrency=self.process_concurrency,
                persist_to=self.persist_to,
                replicate_to=self.replicate_to,
                durability=self.durability_level,
                timeout_secs=self.sdk_timeout,
                retries=self.sdk_retries,
                time_unit=self.time_unit,
                retry_exceptions=retry_exceptions,
                ignore_exceptions=ignore_exceptions,
                skip_read_on_error=skip_read_on_error,
                suppress_error_table=suppress_error_table,
                scope=scope,
                collection=collection,
                monitor_stats=self.monitor_stats,
                track_failures=track_failures,
                sdk_client_pool=self.sdk_client_pool,
                sdk_retry_strategy=sdk_retry_strategy)
            tasks_info.update(tem_tasks_info.items())
            self.num_items += (self.gen_create.end - self.gen_create.start)
        if "expiry" in doc_ops and self.gen_expiry is not None and self.maxttl:
            tem_tasks_info = self.bucket_util._async_load_all_buckets(
                self.cluster,
                self.gen_expiry,
                "update",
                self.maxttl,
                self.random_exp,
                batch_size=self.batch_size,
                process_concurrency=self.process_concurrency,
                persist_to=self.persist_to,
                replicate_to=self.replicate_to,
                durability=self.durability_level,
                timeout_secs=self.sdk_timeout,
                retries=self.sdk_retries,
                time_unit=self.time_unit,
                retry_exceptions=retry_exceptions,
                ignore_exceptions=ignore_exceptions,
                skip_read_on_error=skip_read_on_error,
                suppress_error_table=suppress_error_table,
                scope=scope,
                collection=collection,
                monitor_stats=self.monitor_stats,
                track_failures=track_failures,
                sdk_client_pool=self.sdk_client_pool,
                sdk_retry_strategy=sdk_retry_strategy)
            tasks_info.update(tem_tasks_info.items())
            self.num_items -= (self.gen_expiry.end - self.gen_expiry.start)
        if "read" in doc_ops and self.gen_read is not None:
            read_tasks_info = self.bucket_util._async_validate_docs(
                self.cluster,
                self.gen_read,
                "read",
                0,
                batch_size=self.batch_size,
                process_concurrency=self.process_concurrency,
                timeout_secs=self.sdk_timeout,
                time_unit=self.time_unit,
                retry_exceptions=retry_exceptions,
                ignore_exceptions=ignore_exceptions,
                scope=scope,
                collection=collection,
                suppress_error_table=suppress_error_table,
                sdk_client_pool=self.sdk_client_pool,
                sdk_retry_strategy=sdk_retry_strategy)
            read_task = True
        if "delete" in doc_ops and self.gen_delete is not None:
            tem_tasks_info = self.bucket_util._async_load_all_buckets(
                self.cluster,
                self.gen_delete,
                "delete",
                0,
                batch_size=self.batch_size,
                process_concurrency=self.process_concurrency,
                persist_to=self.persist_to,
                replicate_to=self.replicate_to,
                durability=self.durability_level,
                timeout_secs=self.sdk_timeout,
                retries=self.sdk_retries,
                time_unit=self.time_unit,
                retry_exceptions=retry_exceptions,
                ignore_exceptions=ignore_exceptions,
                skip_read_on_error=skip_read_on_error,
                suppress_error_table=suppress_error_table,
                scope=scope,
                collection=collection,
                monitor_stats=self.monitor_stats,
                track_failures=track_failures,
                sdk_client_pool=self.sdk_client_pool,
                sdk_retry_strategy=sdk_retry_strategy)
            tasks_info.update(tem_tasks_info.items())
            self.num_items -= (self.gen_delete.end - self.gen_delete.start)

        if _sync:
            for task in tasks_info:
                self.task_manager.get_task_result(task)

            self.bucket_util.verify_doc_op_task_exceptions(
                tasks_info, self.cluster, sdk_client_pool=self.sdk_client_pool)
            self.bucket_util.log_doc_ops_task_failures(tasks_info)

        if read_task:
            # TODO: Need to converge read_tasks_info into tasks_info before
            #       itself to avoid confusions during _sync=False case
            tasks_info.update(read_tasks_info.items())
            if _sync:
                for task in read_tasks_info:
                    self.task_manager.get_task_result(task)

        return tasks_info

    def get_bucket_dgm(self, bucket):
        self.rest_client = BucketHelper(self.cluster.master)
        count = 0
        dgm = 100
        while count < 5:
            try:
                dgm = self.rest_client.fetch_bucket_stats(
                    bucket.name
                )["op"]["samples"]["vb_active_resident_items_ratio"][-1]
                self.log.info("Active Resident Threshold of {0} is {1}".format(
                    bucket.name, dgm))
                return dgm
            except Exception as e:
                self.sleep(5, e)
            count += 1
        return dgm

    def change_swap_space(self, servers=None, disable=True):
        servers = servers or self.cluster.nodes_in_cluster
        if type(servers) is not list:
            servers = [servers]
        for server in servers:
            shell = RemoteMachineShellConnection(server)
            if disable:
                _ = shell.execute_command("swapoff -a")
                self.sleep(5)
                output = shell.execute_command(
                    "free | tail -1 | awk '{print $2}'")[0][0].split('\n')[0]
                self.assertEqual(
                    int(output),
                    0,
                    msg=
                    "Failed to disable swap space on server {} having value {} \
                     ".format(server, output))
            else:
                _ = shell.execute_command("swapon -a")
                self.sleep(5)
                output = shell.execute_command(
                    "free | tail -1 | awk '{print $2}'")[0][0].split('\n')[0]
                self.assertNotEqual(
                    int(output),
                    0,
                    msg=
                    "Failed to enable swap space on server {} having value {} \
                    ".format(server, output))
        return

    def check_fragmentation_using_bucket_stats(self, bucket, servers=None):
        # Disabling the check for time being
        #return True
        result = dict()
        if servers is None:
            servers = self.cluster.nodes_in_cluster
        if type(servers) is not list:
            servers = [servers]
        time_end = time.time() + 60 * 5
        while time.time() < time_end:
            for server in servers:
                frag_val = self.bucket_util.get_fragmentation_kv(
                    self.cluster, bucket, server)
                self.log.debug("Current Fragmentation for node {} is {} \
                ".format(server.ip, frag_val))
                result.update({server.ip: frag_val})
            if (max(result.values())) <= 1.1 * (self.fragmentation):
                self.log.info(
                    "KV stats fragmentation values {}".format(result))
                return True
        self.log.info("KV stats fragmentation values {}".format(result))
        return False

    def get_fragmentation_upsert_docs_list(self):
        """
         This function gives the list of "number of docs" need
         to be updated to touch the given fragmentation value
        """
        update_doc_count = int(
            math.ceil(
                float(self.fragmentation * self.num_items) /
                (100 - self.fragmentation)))

        upsert_doc_list = list()
        while update_doc_count > self.num_items:
            upsert_doc_list.append(self.num_items)
            update_doc_count -= self.num_items
        if update_doc_count > 0:
            upsert_doc_list.append(update_doc_count)
        self.log.info("Upsert list {}".format(upsert_doc_list))
        return upsert_doc_list

    def validate_data(self, op_type, kv_gen, _sync=True):
        self.log.info("Validating Docs")
        validate_tasks_info = dict()
        for collection in self.collections:
            temp_tasks_info = self.bucket_util._async_validate_docs(
                self.cluster,
                kv_gen,
                op_type,
                0,
                batch_size=self.batch_size,
                process_concurrency=self.process_concurrency,
                timeout_secs=self.sdk_timeout,
                scope=CbServer.default_scope,
                collection=collection,
                retry_exceptions=self.retry_exceptions,
                ignore_exceptions=self.ignore_exceptions,
                sdk_client_pool=self.sdk_client_pool)
            validate_tasks_info.update(temp_tasks_info.items())
        if _sync:
            for task in validate_tasks_info:
                self.task_manager.get_task_result(task)
        else:
            return validate_tasks_info

    def sigkill_memcached(self, nodes=None, graceful=False):
        nodes = nodes or self.cluster.nodes_in_cluster
        for node in nodes:
            shell = RemoteMachineShellConnection(node)
            if graceful:
                shell.restart_couchbase()
            else:
                shell.kill_memcached()
            shell.disconnect()
        self.assertTrue(
            self.bucket_util._wait_warmup_completed(
                [self.cluster.master],
                self.cluster.buckets[0],
                wait_time=self.wait_timeout * 20))

    def get_memory_footprint(self):
        out = subprocess.Popen(
            ['ps', 'v', '-p', str(os.getpid())],
            stdout=subprocess.PIPE).communicate()[0].split(b'\n')
        vsz_index = out[0].split().index(b'RSS')
        mem = float(out[1].split()[vsz_index]) / 1024
        print("RAM FootPrint: %s" % str(mem))

    def crash(self,
              nodes=None,
              kill_itr=1,
              graceful=False,
              wait=True,
              force_collect=False):
        self.stop_crash = False
        self.crash_failure = False
        count = kill_itr
        loop_itr = 0
        msg = None

        nodes = nodes or self.cluster.nodes_in_cluster

        connections = dict()
        for node in nodes:
            shell = RemoteMachineShellConnection(node)
            connections.update({node: shell})

        while not self.stop_crash:
            loop_itr += 1
            sleep = random.randint(30, 60)
            self.sleep(
                sleep,
                "Iteration:{} waiting for {} sec to kill memcached on all nodes"
                .format(loop_itr, sleep))

            for node, shell in connections.items():
                if "kv" in node.services:
                    if graceful:
                        shell.restart_couchbase()
                    else:
                        while count > 0:
                            shell.kill_memcached()
                            self.sleep(
                                3,
                                "Sleep before killing memcached on same node again."
                            )
                            count -= 1
                        count = kill_itr

            result = self.check_coredump_exist(self.cluster.nodes_in_cluster,
                                               force_collect=force_collect)
            if result:
                self.stop_crash = True
                self.task.jython_task_manager.abort_all_tasks()
                self.crash_failure = result
                msg = "CRASH | CRITICAL | WARN messages found in cb_logs"
                self.log.critical(msg)

            if wait:
                for node in nodes:
                    if "kv" in node.services:
                        result = self.bucket_util._wait_warmup_completed(
                            [node],
                            self.cluster.buckets[0],
                            wait_time=self.wait_timeout * 5)
                        if not result:
                            msg = "warm-up couldn't complete in %s seconds" %\
                                (self.wait_timeout * 5)
                            self.log.critical(msg)
                            self.task.jython_task_manager.abort_all_tasks()
                            self.stop_crash = True
                            self.crash_failure = True

        for _, shell in connections.items():
            shell.disconnect()

    def chmod(self, server, path, mod="000"):
        '''
            # (Base-10)    Binary    Sum (in binary)    Sum (in decimal)    rwx    Permission
            7    111    = 100 + 10 + 1    = 4(r) + 2(w) + 1(x)    rwx    read, write and execute
            6    110    = 100 + 10    = 4(r) + 2(w)    rw-    read and write
            5    101    = 100      + 1    = 4(r)        + 1(x)    r-x    read and execute
            4    100    = 100    = 4(r)    r--    read only
            3    011    =       10 + 1    =        2(w) + 1(x)    -wx    write and execute
            2    010    =       10    =        2(w)    -w-    write only
            1    001    =            1    =               1(x)    --x    execute only
            0    000    = 0    = 0    ---    none
        '''
        self.stop_chmod = False
        while self.stop_chmod is False:
            shell = RemoteMachineShellConnection(server)
            self.log.debug("{}: changing mod to {} for {}".format(
                server.ip, mod, path))
            shell.execute_command("chmod {} {}".format(mod, path))
            self.sleep(5)
            self.log.debug("{}: changing mod to {} for {}".format(
                server.ip, "777", path))
            shell.execute_command("chmod {} {}".format("777", path))
            self.sleep(5)
            shell.disconnect()

    def set_metadata_purge_interval(self, value, buckets=[], node=None):
        self.log.info(
            "Changing the bucket properties by changing {0} to {1}".format(
                "purge_interval", value))
        if not buckets:
            buckets = self.buckets
        if node is None:
            node = self.cluster.master
        rest = RestConnection(node)

        shell = RemoteMachineShellConnection(node)
        shell.enable_diag_eval_on_non_local_hosts()
        shell.disconnect()

        for bucket in buckets:
            cmd = '{ok, BC} = ns_bucket:get_bucket(' \
                  '"%s"), BC2 = lists:keyreplace(purge_interval, ' \
                  '1, BC, {purge_interval, %f})' \
                  ', ns_bucket:set_bucket_config("%s", BC2).' \
                  % (bucket.name, value, bucket.name)
            rest.diag_eval(cmd)

        # Restart Memcached in all cluster nodes to reflect the settings
        for server in self.cluster_util.get_kv_nodes(self.cluster,
                                                     master=node):
            shell = RemoteMachineShellConnection(server)
            shell.restart_couchbase()
            shell.disconnect()

        # Check bucket-warm_up after Couchbase restart
        retry_count = 10
        buckets_warmed_up = self.bucket_util.is_warmup_complete(
            self.cluster, buckets, retry_count)
        if not buckets_warmed_up:
            self.log.critical("Few bucket(s) not warmed up "
                              "within expected time")

    def fetch_data_path(self):
        data_path = self.rest.get_data_path()
        if "c:/Program Files" in data_path:
            data_path = data_path.replace("c:/Program Files",
                                          "/cygdrive/c/Program\ Files")
        return data_path
Esempio n. 2
0
class CollectionsRebalance(CollectionBase):
    def setUp(self):
        super(CollectionsRebalance, self).setUp()
        self.bucket_util._expiry_pager()
        self.load_gen = doc_generator(self.key, 0, self.num_items)
        self.bucket = self.bucket_util.buckets[0]
        self.rest = RestConnection(self.cluster.master)
        self.data_load_spec = self.input.param("data_load_spec",
                                               "volume_test_load")
        self.data_load_stage = self.input.param("data_load_stage", "before")
        self.data_load_type = self.input.param("data_load_type", "async")
        self.nodes_swap = self.input.param("nodes_swap", 1)
        self.nodes_failover = self.input.param("nodes_failover", 1)
        self.failover_ops = [
            "graceful_failover_rebalance_out", "hard_failover_rebalance_out",
            "graceful_failover_recovery", "hard_failover_recovery"
        ]
        self.step_count = self.input.param("step_count", -1)
        self.recovery_type = self.input.param("recovery_type", "full")
        self.compaction = self.input.param("compaction", False)
        if self.compaction:
            self.disable_auto_compaction()
        self.warmup = self.input.param("warmup", False)
        self.update_replica = self.input.param(
            "update_replica", False)  # for replica + rebalance tests
        self.updated_num_replicas = self.input.param(
            "updated_num_replicas",
            1)  # for replica + rebalance tests, forced hard failover
        self.forced_hard_failover = self.input.param(
            "forced_hard_failover", False)  # for forced hard failover tests
        self.change_ram_quota_cluster = self.input.param(
            "change_ram_quota_cluster", False)  # To change during rebalance
        self.skip_validations = self.input.param("skip_validations", True)
        if self.compaction:
            self.compaction_tasks = list()
        self.dgm_test = self.input.param("dgm_test", False)

    def tearDown(self):
        super(CollectionsRebalance, self).tearDown()

    def disable_auto_compaction(self):
        buckets = self.bucket_util.get_all_buckets()
        for bucket in buckets:
            if bucket.bucketType == "couchbase":
                self.bucket_util.disable_compaction(bucket=str(bucket.name))

    def compact_all_buckets(self):
        self.sleep(10, "wait for rebalance to start")
        self.log.info("Starting compaction for each bucket")
        for bucket in self.bucket_util.buckets:
            self.compaction_tasks.append(
                self.task.async_compact_bucket(self.cluster.master, bucket))

    def warmup_node(self, node):
        self.log.info("Warmuping up node...")
        shell = RemoteMachineShellConnection(node)
        shell.stop_couchbase()
        self.sleep(30)
        shell.start_couchbase()
        shell.disconnect()
        self.log.info("Done warming up...")

    def set_ram_quota_cluster(self):
        self.sleep(45, "Wait for rebalance have some progress")
        self.log.info("Changing cluster RAM size")
        status = self.rest.init_cluster_memoryQuota(
            self.cluster.master.rest_username,
            self.cluster.master.rest_password,
            memoryQuota=2500)
        self.assertTrue(status, "RAM quota wasn't changed")

    def set_retry_exceptions(self, doc_loading_spec):
        retry_exceptions = []
        if self.data_load_stage == "during" or (
                self.data_load_stage == "before"
                and self.data_load_type == "async"):
            retry_exceptions.append(SDKException.AmbiguousTimeoutException)
            retry_exceptions.append(SDKException.TimeoutException)
            retry_exceptions.append(SDKException.RequestCanceledException)
            if self.durability_level:
                retry_exceptions.append(
                    SDKException.DurabilityAmbiguousException)
                retry_exceptions.append(
                    SDKException.DurabilityImpossibleException)
        doc_loading_spec[MetaCrudParams.RETRY_EXCEPTIONS] = retry_exceptions

    def get_active_resident_threshold(self, bucket_name):
        self.rest_client = BucketHelper(self.cluster.master)
        dgm = self.rest_client.fetch_bucket_stats(
            bucket_name)["op"]["samples"]["vb_active_resident_items_ratio"][-1]
        return dgm

    def load_to_dgm(self, threshold=100):
        # load data until resident % goes below 100
        bucket_name = self.bucket_util.buckets[0].name
        curr_active = self.get_active_resident_threshold(bucket_name)
        while curr_active >= threshold:
            self.subsequent_data_load(data_load_spec="dgm_load")
            curr_active = self.get_active_resident_threshold(bucket_name)
            self.log.info("curr_active resident {0} %".format(curr_active))
            self.bucket_util._wait_for_stats_all_buckets()
        self.log.info(
            "Initial dgm load done. Resident {0} %".format(curr_active))

    def data_load_after_failover(self):
        self.log.info("Starting a sync data load after failover")
        self.subsequent_data_load()  # sync data load
        # Until we recover/rebalance-out, we can't call - self.bucket_util.validate_docs_per_collections_all_buckets()
        self.bucket_util._wait_for_stats_all_buckets()

    def wait_for_failover_or_assert(self,
                                    expected_failover_count,
                                    timeout=180):
        time_start = time.time()
        time_max_end = time_start + timeout
        actual_failover_count = 0
        while time.time() < time_max_end:
            actual_failover_count = self.get_failover_count()
            if actual_failover_count == expected_failover_count:
                break
            time.sleep(20)
        time_end = time.time()
        if actual_failover_count != expected_failover_count:
            self.log.info(self.rest.print_UI_logs())
        self.assertTrue(
            actual_failover_count == expected_failover_count,
            "{0} nodes failed over, expected : {1}".format(
                actual_failover_count, expected_failover_count))
        self.log.info(
            "{0} nodes failed over as expected in {1} seconds".format(
                actual_failover_count, time_end - time_start))

    def get_failover_count(self):
        rest = RestConnection(self.cluster.master)
        cluster_status = rest.cluster_status()
        failover_count = 0
        # check for inactiveFailed
        for node in cluster_status['nodes']:
            if node['clusterMembership'] == "inactiveFailed":
                failover_count += 1
        return failover_count

    def forced_failover_operation(self,
                                  known_nodes=None,
                                  failover_nodes=None,
                                  wait_for_pending=120):
        self.log.info("Updating all the bucket replicas to {0}".format(
            self.updated_num_replicas))
        self.bucket_util.update_all_bucket_replicas(self.updated_num_replicas)
        failover_count = 0
        for failover_node in failover_nodes:
            failover_operation = self.task.failover(
                known_nodes,
                failover_nodes=[failover_node],
                graceful=False,
                wait_for_pending=wait_for_pending)
            failover_count = failover_count + 1
            self.wait_for_failover_or_assert(failover_count)
        operation = self.task.async_rebalance(known_nodes, [], failover_nodes)
        self.data_load_after_failover()
        return operation

    def rebalance_operation(self,
                            rebalance_operation,
                            known_nodes=None,
                            add_nodes=None,
                            remove_nodes=None,
                            failover_nodes=None,
                            wait_for_pending=120,
                            tasks=None):
        self.log.info("Starting rebalance operation of type : {0}".format(
            rebalance_operation))
        step_count = self.step_count
        if rebalance_operation == "rebalance_out":
            if step_count == -1:
                if self.warmup:
                    node = known_nodes[-1]
                    self.warmup_node(node)
                    operation = self.task.async_rebalance(
                        known_nodes, [], remove_nodes)
                    self.task.jython_task_manager.get_task_result(operation)
                    if not operation.result:
                        self.log.info("rebalance was failed as expected")
                        for bucket in self.bucket_util.buckets:
                            self.assertTrue(
                                self.bucket_util._wait_warmup_completed(
                                    [node], bucket))
                        self.log.info("second attempt to rebalance")
                        self.sleep(
                            60, "wait before starting rebalance after warmup")
                        operation = self.task.async_rebalance(
                            known_nodes, [], remove_nodes)
                        self.wait_for_rebalance_to_complete(operation)
                    self.sleep(60)
                else:
                    if self.update_replica:
                        self.log.info(
                            "Updating all the bucket replicas to {0}".format(
                                self.updated_num_replicas))
                        self.bucket_util.update_all_bucket_replicas(
                            self.updated_num_replicas)
                        self.bucket_util.print_bucket_stats()
                    # all at once
                    operation = self.task.async_rebalance(
                        known_nodes, [], remove_nodes)
                    if self.compaction:
                        self.compact_all_buckets()
                    if self.change_ram_quota_cluster:
                        self.set_ram_quota_cluster()
            else:
                # list of lists each of length step_count
                remove_list = []
                for i in range(0, len(remove_nodes), step_count):
                    if i + step_count >= len(remove_nodes):
                        remove_list.append(remove_nodes[i:])
                    else:
                        remove_list.append(remove_nodes[i:i + step_count])
                iter_count = 0
                # start each intermediate rebalance and wait for it to finish before
                # starting new one
                for new_remove_nodes in remove_list:
                    operation = self.task.async_rebalance(
                        known_nodes, [], new_remove_nodes)
                    known_nodes = [
                        node for node in known_nodes
                        if node not in new_remove_nodes
                    ]
                    iter_count = iter_count + 1
                    # if this is last intermediate rebalance, don't wait
                    if iter_count == len(remove_list):
                        continue
                    self.wait_for_rebalance_to_complete(operation)
        elif rebalance_operation == "rebalance_in":
            if step_count == -1:
                if self.warmup:
                    node = known_nodes[-1]
                    self.warmup_node(node)
                    operation = self.task.async_rebalance(
                        known_nodes, add_nodes, [])
                    self.task.jython_task_manager.get_task_result(operation)
                    if not operation.result:
                        self.log.info("rebalance was failed as expected")
                        for bucket in self.bucket_util.buckets:
                            self.assertTrue(
                                self.bucket_util._wait_warmup_completed(
                                    [node], bucket))
                        self.log.info("second attempt to rebalance")
                        self.sleep(
                            60, "wait before starting rebalance after warmup")
                        operation = self.task.async_rebalance(
                            known_nodes + add_nodes, [], [])
                        self.wait_for_rebalance_to_complete(operation)
                    self.sleep(60)
                else:
                    if self.update_replica:
                        self.log.info(
                            "Updating all the bucket replicas to {0}".format(
                                self.updated_num_replicas))
                        self.bucket_util.update_all_bucket_replicas(
                            self.updated_num_replicas)
                        self.bucket_util.print_bucket_stats()
                    # all at once
                    operation = self.task.async_rebalance(
                        known_nodes, add_nodes, [])
                    if self.compaction:
                        self.compact_all_buckets()
                    if self.change_ram_quota_cluster:
                        self.set_ram_quota_cluster()
            else:
                # list of lists each of length step_count
                add_list = []
                for i in range(0, len(add_nodes), step_count):
                    if i + step_count >= len(add_nodes):
                        add_list.append(add_nodes[i:])
                    else:
                        add_list.append(add_nodes[i:i + step_count])
                iter_count = 0
                # start each intermediate rebalance and wait for it to finish before
                # starting new one
                for new_add_nodes in add_list:
                    operation = self.task.async_rebalance(
                        known_nodes, new_add_nodes, [])
                    known_nodes.append(new_add_nodes)
                    iter_count = iter_count + 1
                    # if this is last intermediate rebalance, don't wait
                    if iter_count == len(add_list):
                        continue
                    self.wait_for_rebalance_to_complete(operation)
        elif rebalance_operation == "swap_rebalance":
            if (step_count == -1):
                if self.warmup:
                    for node in add_nodes:
                        self.rest.add_node(
                            self.cluster.master.rest_username,
                            self.cluster.master.rest_password, node.ip,
                            self.cluster.servers[self.nodes_init].port)
                    node = known_nodes[-1]
                    self.warmup_node(node)
                    operation = self.task.async_rebalance(
                        self.cluster.servers[:self.nodes_init], [],
                        remove_nodes,
                        check_vbucket_shuffling=False)
                    self.task.jython_task_manager.get_task_result(operation)
                    if not operation.result:
                        self.log.info("rebalance was failed as expected")
                        for bucket in self.bucket_util.buckets:
                            self.assertTrue(
                                self.bucket_util._wait_warmup_completed(
                                    [node], bucket))
                        self.log.info("second attempt to rebalance")
                        self.sleep(
                            60, "wait before starting rebalance after warmup")
                        operation = self.task.async_rebalance(
                            self.cluster.servers[:self.nodes_init], [],
                            remove_nodes)
                        self.wait_for_rebalance_to_complete(operation)
                    self.sleep(60)
                else:
                    if self.update_replica:
                        self.log.info(
                            "Updating all the bucket replicas to {0}".format(
                                self.updated_num_replicas))
                        self.bucket_util.update_all_bucket_replicas(
                            self.updated_num_replicas)
                        self.bucket_util.print_bucket_stats()
                    for node in add_nodes:
                        self.rest.add_node(
                            self.cluster.master.rest_username,
                            self.cluster.master.rest_password, node.ip,
                            self.cluster.servers[self.nodes_init].port)
                    operation = self.task.async_rebalance(
                        self.cluster.servers[:self.nodes_init], [],
                        remove_nodes,
                        check_vbucket_shuffling=False)
                    if self.compaction:
                        self.compact_all_buckets()
                    if self.change_ram_quota_cluster:
                        self.set_ram_quota_cluster()
            else:
                # list of lists each of length step_count
                add_list = []
                remove_list = []
                for i in range(0, len(add_nodes), step_count):
                    if i + step_count >= len(add_nodes):
                        add_list.append(add_nodes[i:])
                        remove_list.append(remove_nodes[i:])
                    else:
                        add_list.append(add_nodes[i:i + step_count])
                        remove_list.append(remove_nodes[i:i + step_count])
                iter_count = 0
                # start each intermediate rebalance and wait for it to finish before
                # starting new one
                for new_add_nodes, new_remove_nodes in zip(
                        add_list, remove_list):
                    operation = self.task.async_rebalance(
                        known_nodes,
                        new_add_nodes,
                        new_remove_nodes,
                        check_vbucket_shuffling=False)
                    known_nodes = [
                        node for node in known_nodes
                        if node not in new_remove_nodes
                    ]
                    known_nodes.extend(new_add_nodes)
                    iter_count = iter_count + 1
                    # if this is last intermediate rebalance, don't wait
                    if iter_count == len(add_list):
                        continue
                    self.wait_for_rebalance_to_complete(operation)
        elif rebalance_operation == "rebalance_in_out":
            if self.warmup:
                for node in add_nodes:
                    self.rest.add_node(
                        self.cluster.master.rest_username,
                        self.cluster.master.rest_password, node.ip,
                        self.cluster.servers[self.nodes_init].port)
                node = known_nodes[-1]
                self.warmup_node(node)
                operation = self.task.async_rebalance(
                    self.cluster.servers[:self.nodes_init], [], remove_nodes)
                self.task.jython_task_manager.get_task_result(operation)
                if not operation.result:
                    self.log.info("rebalance was failed as expected")
                    for bucket in self.bucket_util.buckets:
                        self.assertTrue(
                            self.bucket_util._wait_warmup_completed([node],
                                                                    bucket))
                    self.log.info("second attempt to rebalance")
                    self.sleep(60,
                               "wait before starting rebalance after warmup")
                    operation = self.task.async_rebalance(
                        self.cluster.servers[:self.nodes_init], [],
                        remove_nodes)
                    self.wait_for_rebalance_to_complete(operation)
                self.sleep(60)
            else:
                if self.update_replica:
                    self.log.info(
                        "Updating all the bucket replicas to {0}".format(
                            self.updated_num_replicas))
                    self.bucket_util.update_all_bucket_replicas(
                        self.updated_num_replicas)
                    self.bucket_util.print_bucket_stats()
                for node in add_nodes:
                    self.rest.add_node(
                        self.cluster.master.rest_username,
                        self.cluster.master.rest_password, node.ip,
                        self.cluster.servers[self.nodes_init].port)
                operation = self.task.async_rebalance(
                    self.cluster.servers[:self.nodes_init], [], remove_nodes)
                if self.compaction:
                    self.compact_all_buckets()
                if self.change_ram_quota_cluster:
                    self.set_ram_quota_cluster()
        elif rebalance_operation == "graceful_failover_rebalance_out":
            if step_count == -1:
                failover_count = 0
                for failover_node in failover_nodes:
                    failover_operation = self.task.failover(
                        known_nodes,
                        failover_nodes=[failover_node],
                        graceful=True,
                        wait_for_pending=wait_for_pending)
                    failover_count = failover_count + 1
                    self.wait_for_failover_or_assert(failover_count)
                if tasks is not None:
                    self.wait_for_async_data_load_to_complete(tasks)
                if self.compaction:
                    self.compact_all_buckets()
                self.data_load_after_failover()
                operation = self.task.async_rebalance(known_nodes, [],
                                                      failover_nodes)
                if self.change_ram_quota_cluster:
                    self.set_ram_quota_cluster()
            else:
                # list of lists each of length step_count
                failover_list = []
                for i in range(0, len(failover_nodes), step_count):
                    if i + step_count >= len(failover_nodes):
                        failover_list.append(failover_nodes[i:])
                    else:
                        failover_list.append(failover_nodes[i:i + step_count])
                # For each set of step_count number of failover nodes we failover and rebalance them out
                iter_count = 0
                for new_failover_nodes in failover_list:
                    failover_count = 0
                    for failover_node in new_failover_nodes:
                        failover_operation = self.task.failover(
                            known_nodes,
                            failover_nodes=[failover_node],
                            graceful=True,
                            wait_for_pending=wait_for_pending)
                        failover_count = failover_count + 1
                        self.wait_for_failover_or_assert(failover_count)
                    if tasks is not None:
                        self.wait_for_async_data_load_to_complete(tasks)
                        tasks = None
                    self.data_load_after_failover()
                    operation = self.task.async_rebalance(
                        known_nodes, [], new_failover_nodes)
                    iter_count = iter_count + 1
                    known_nodes = [
                        node for node in known_nodes
                        if node not in new_failover_nodes
                    ]
                    if iter_count == len(failover_list):
                        continue
                    self.wait_for_rebalance_to_complete(operation)
        elif rebalance_operation == "hard_failover_rebalance_out":
            if step_count == -1:
                failover_count = 0
                for failover_node in failover_nodes:
                    failover_operation = self.task.failover(
                        known_nodes,
                        failover_nodes=[failover_node],
                        graceful=False,
                        wait_for_pending=wait_for_pending)
                    failover_count = failover_count + 1
                    self.wait_for_failover_or_assert(failover_count)
                if tasks is not None:
                    self.wait_for_async_data_load_to_complete(tasks)
                if self.compaction:
                    self.compact_all_buckets()
                self.data_load_after_failover()
                operation = self.task.async_rebalance(known_nodes, [],
                                                      failover_nodes)
                if self.change_ram_quota_cluster:
                    self.set_ram_quota_cluster()
            else:
                # list of lists each of length step_count
                failover_list = []
                for i in range(0, len(failover_nodes), step_count):
                    if i + step_count >= len(failover_nodes):
                        failover_list.append(failover_nodes[i:])
                    else:
                        failover_list.append(failover_nodes[i:i + step_count])
                # For each set of step_count number of failover nodes we failover and rebalance them out
                iter_count = 0
                for new_failover_nodes in failover_list:
                    failover_count = 0
                    for failover_node in new_failover_nodes:
                        failover_operation = self.task.failover(
                            known_nodes,
                            failover_nodes=[failover_node],
                            graceful=False,
                            wait_for_pending=wait_for_pending)
                        failover_count = failover_count + 1
                        self.wait_for_failover_or_assert(failover_count)
                    if tasks is not None:
                        self.wait_for_async_data_load_to_complete(tasks)
                        tasks = None
                    self.data_load_after_failover()
                    operation = self.task.async_rebalance(
                        known_nodes, [], new_failover_nodes)
                    iter_count = iter_count + 1
                    known_nodes = [
                        node for node in known_nodes
                        if node not in new_failover_nodes
                    ]
                    if iter_count == len(failover_list):
                        continue
                    self.wait_for_rebalance_to_complete(operation)
        elif rebalance_operation == "graceful_failover_recovery":
            if (step_count == -1):
                failover_count = 0
                for failover_node in failover_nodes:
                    failover_operation = self.task.failover(
                        known_nodes,
                        failover_nodes=[failover_node],
                        graceful=True,
                        wait_for_pending=wait_for_pending)
                    failover_count = failover_count + 1
                    self.wait_for_failover_or_assert(failover_count)
                if tasks is not None:
                    self.wait_for_async_data_load_to_complete(tasks)
                self.data_load_after_failover()
                # Mark the failover nodes for recovery
                for failover_node in failover_nodes:
                    self.rest.set_recovery_type(
                        otpNode='ns_1@' + failover_node.ip,
                        recoveryType=self.recovery_type)
                if self.compaction:
                    self.compact_all_buckets()
                # Rebalance all the nodes
                operation = self.task.async_rebalance(known_nodes, [], [])
                if self.change_ram_quota_cluster:
                    self.set_ram_quota_cluster()
            else:
                # list of lists each of length step_count
                failover_list = []
                for i in range(0, len(failover_nodes), step_count):
                    if i + step_count >= len(failover_nodes):
                        failover_list.append(failover_nodes[i:])
                    else:
                        failover_list.append(failover_nodes[i:i + step_count])
                # For each set of step_count number of failover nodes we failover and recover
                iter_count = 0
                for new_failover_nodes in failover_list:
                    failover_count = 0
                    for failover_node in new_failover_nodes:
                        failover_operation = self.task.failover(
                            known_nodes,
                            failover_nodes=[failover_node],
                            graceful=True,
                            wait_for_pending=wait_for_pending)

                        failover_count = failover_count + 1
                        self.wait_for_failover_or_assert(failover_count)
                    if tasks is not None:
                        self.wait_for_async_data_load_to_complete(tasks)
                        tasks = None
                    self.data_load_after_failover()
                    # Mark the failover nodes for recovery
                    for failover_node in new_failover_nodes:
                        self.rest.set_recovery_type(
                            otpNode='ns_1@' + failover_node.ip,
                            recoveryType=self.recovery_type)
                    operation = self.task.async_rebalance(known_nodes, [], [])
                    iter_count = iter_count + 1
                    if iter_count == len(failover_list):
                        continue
                    self.wait_for_rebalance_to_complete(operation)
        elif rebalance_operation == "hard_failover_recovery":
            if (step_count == -1):
                failover_count = 0
                for failover_node in failover_nodes:
                    failover_operation = self.task.failover(
                        known_nodes,
                        failover_nodes=[failover_node],
                        graceful=False,
                        wait_for_pending=wait_for_pending)
                    failover_count = failover_count + 1
                    self.wait_for_failover_or_assert(failover_count)
                if tasks is not None:
                    self.wait_for_async_data_load_to_complete(tasks)
                self.data_load_after_failover()
                # Mark the failover nodes for recovery
                for failover_node in failover_nodes:
                    self.rest.set_recovery_type(
                        otpNode='ns_1@' + failover_node.ip,
                        recoveryType=self.recovery_type)
                if self.compaction:
                    self.compact_all_buckets()
                # Rebalance all the nodes
                operation = self.task.async_rebalance(known_nodes, [], [])
                if self.change_ram_quota_cluster:
                    self.set_ram_quota_cluster()
            else:
                # list of lists each of length step_count
                failover_list = []
                for i in range(0, len(failover_nodes), step_count):
                    if i + step_count >= len(failover_nodes):
                        failover_list.append(failover_nodes[i:])
                    else:
                        failover_list.append(failover_nodes[i:i + step_count])
                # For each set of step_count number of failover nodes we failover and recover
                iter_count = 0
                for new_failover_nodes in failover_list:
                    failover_count = 0
                    for failover_node in new_failover_nodes:
                        failover_operation = self.task.failover(
                            known_nodes,
                            failover_nodes=[failover_node],
                            graceful=False,
                            wait_for_pending=wait_for_pending)

                        failover_count = failover_count + 1
                        self.wait_for_failover_or_assert(failover_count)
                    if tasks is not None:
                        self.wait_for_async_data_load_to_complete(tasks)
                        tasks = None
                    self.data_load_after_failover()
                    # Mark the failover nodes for recovery
                    for failover_node in new_failover_nodes:
                        self.rest.set_recovery_type(
                            otpNode='ns_1@' + failover_node.ip,
                            recoveryType=self.recovery_type)
                    operation = self.task.async_rebalance(known_nodes, [], [])
                    iter_count = iter_count + 1
                    if iter_count == len(failover_list):
                        continue
                    self.wait_for_rebalance_to_complete(operation)
        else:
            self.fail("rebalance_operation is not defined")
        return operation

    def subsequent_data_load(self, async_load=False, data_load_spec=None):
        if data_load_spec is None:
            data_load_spec = self.data_load_spec
        doc_loading_spec = self.bucket_util.get_crud_template_from_package(
            data_load_spec)
        self.over_ride_doc_loading_template_params(doc_loading_spec)
        self.set_retry_exceptions(doc_loading_spec)
        if self.dgm_test:
            if data_load_spec == "dgm_load":
                # pre-load to dgm
                doc_loading_spec[MetaCrudParams.DocCrud.
                                 CREATE_PERCENTAGE_PER_COLLECTION] = 2
            else:
                # Do only deletes during dgm + rebalance op
                doc_loading_spec[MetaCrudParams.DocCrud.
                                 CREATE_PERCENTAGE_PER_COLLECTION] = 0
        if self.forced_hard_failover and self.spec_name == "multi_bucket.buckets_for_rebalance_tests_more_collections":
            # create collections, else if other bucket_spec - then just "create" ops
            doc_loading_spec[MetaCrudParams.COLLECTIONS_TO_ADD_PER_BUCKET] = 20
        tasks = self.bucket_util.run_scenario_from_spec(
            self.task,
            self.cluster,
            self.bucket_util.buckets,
            doc_loading_spec,
            mutation_num=0,
            async_load=async_load,
            batch_size=self.batch_size,
            validate_task=(not self.skip_validations))
        return tasks

    def async_data_load(self):
        tasks = self.subsequent_data_load(async_load=True)
        return tasks

    def sync_data_load(self):
        self.subsequent_data_load()

    def wait_for_async_data_load_to_complete(self, task):
        self.task.jython_task_manager.get_task_result(task)
        if not self.skip_validations:
            self.bucket_util.validate_doc_loading_results(task)
            if task.result is False:
                self.fail("Doc_loading failed")

    def wait_for_compaction_to_complete(self):
        for task in self.compaction_tasks:
            self.task_manager.get_task_result(task)
            self.assertTrue(
                task.result,
                "Compaction failed for bucket: %s" % task.bucket.name)

    def wait_for_rebalance_to_complete(self, task, wait_step=120):
        self.task.jython_task_manager.get_task_result(task)
        if self.dgm_test and (not task.result):
            fail_flag = True
            for bucket in self.bucket_util.buckets:
                result = self.get_active_resident_threshold(bucket.name)
                if result < 20:
                    fail_flag = False
                    self.log.error("DGM less than 20")
                    break
            self.assertFalse(fail_flag, "rebalance failed")
        else:
            self.assertTrue(task.result, "Rebalance Failed")
        if self.compaction:
            self.wait_for_compaction_to_complete()

    def data_validation_collection(self):
        if not self.skip_validations:
            if self.data_load_spec == "ttl_load" or self.data_load_spec == "ttl_load1":
                self.bucket_util._expiry_pager()
                self.sleep(400, "wait for maxttl to finish")
                items = 0
                self.bucket_util._wait_for_stats_all_buckets()
                for bucket in self.bucket_util.buckets:
                    items = items + self.bucket_helper_obj.get_active_key_count(
                        bucket)
                if items != 0:
                    self.fail("TTL + rebalance failed")
            elif self.forced_hard_failover:
                pass
            else:
                self.bucket_util._wait_for_stats_all_buckets()
                self.bucket_util.validate_docs_per_collections_all_buckets()

    def load_collections_with_rebalance(self, rebalance_operation):
        tasks = None
        rebalance = None
        self.log.info("Doing collection data load {0} {1}".format(
            self.data_load_stage, rebalance_operation))
        if self.data_load_stage == "before":
            if self.data_load_type == "async":
                tasks = self.async_data_load()
            else:
                self.sync_data_load()
        if self.dgm_test:
            self.load_to_dgm()
        if rebalance_operation == "rebalance_in":
            rebalance = self.rebalance_operation(
                rebalance_operation="rebalance_in",
                known_nodes=self.cluster.servers[:self.nodes_init],
                add_nodes=self.cluster.
                servers[self.nodes_init:self.nodes_init + self.nodes_in],
                tasks=tasks)

        elif rebalance_operation == "rebalance_out":
            rebalance = self.rebalance_operation(
                rebalance_operation="rebalance_out",
                known_nodes=self.cluster.servers[:self.nodes_init],
                remove_nodes=self.cluster.servers[:self.nodes_init]
                [-self.nodes_out:],
                tasks=tasks)
        elif rebalance_operation == "swap_rebalance":
            rebalance = self.rebalance_operation(
                rebalance_operation="swap_rebalance",
                known_nodes=self.cluster.servers[:self.nodes_init],
                add_nodes=self.cluster.
                servers[self.nodes_init:self.nodes_init + self.nodes_swap],
                remove_nodes=self.cluster.servers[:self.nodes_init]
                [-self.nodes_swap:],
                tasks=tasks)
        elif rebalance_operation == "rebalance_in_out":
            rebalance = self.rebalance_operation(
                rebalance_operation="rebalance_in_out",
                known_nodes=self.cluster.servers[:self.nodes_init],
                add_nodes=self.cluster.
                servers[self.nodes_init:self.nodes_init + self.nodes_in],
                remove_nodes=self.cluster.servers[:self.nodes_init]
                [-self.nodes_out:],
                tasks=tasks)
        elif rebalance_operation == "graceful_failover_rebalance_out":
            rebalance = self.rebalance_operation(
                rebalance_operation="graceful_failover_rebalance_out",
                known_nodes=self.cluster.servers[:self.nodes_init],
                failover_nodes=self.cluster.servers[:self.nodes_init]
                [-self.nodes_failover:],
                tasks=tasks)
        elif rebalance_operation == "hard_failover_rebalance_out":
            rebalance = self.rebalance_operation(
                rebalance_operation="hard_failover_rebalance_out",
                known_nodes=self.cluster.servers[:self.nodes_init],
                failover_nodes=self.cluster.servers[:self.nodes_init]
                [-self.nodes_failover:],
                tasks=tasks)
        elif rebalance_operation == "graceful_failover_recovery":
            rebalance = self.rebalance_operation(
                rebalance_operation="graceful_failover_recovery",
                known_nodes=self.cluster.servers[:self.nodes_init],
                failover_nodes=self.cluster.servers[:self.nodes_init]
                [-self.nodes_failover:],
                tasks=tasks)
        elif rebalance_operation == "hard_failover_recovery":
            rebalance = self.rebalance_operation(
                rebalance_operation="hard_failover_recovery",
                known_nodes=self.cluster.servers[:self.nodes_init],
                failover_nodes=self.cluster.servers[:self.nodes_init]
                [-self.nodes_failover:],
                tasks=tasks)
        elif rebalance_operation == "forced_hard_failover_rebalance_out":
            rebalance = self.forced_failover_operation(
                known_nodes=self.cluster.servers[:self.nodes_init],
                failover_nodes=self.cluster.servers[:self.nodes_init]
                [-self.nodes_failover:])

        if self.data_load_stage == "during":
            # MB-40654
            self.sleep(10, "wait for rebalance to start")
            if self.data_load_type == "async":
                tasks = self.async_data_load()
            else:
                self.sync_data_load()
        if not self.warmup:
            self.wait_for_rebalance_to_complete(rebalance)
        if self.data_load_stage == "during" or self.data_load_stage == "before":
            if self.data_load_type == "async":
                # for failover + before + async, wait_for_async_data_load_to_complete is already done
                if self.data_load_stage == "before" and rebalance_operation in self.failover_ops:
                    pass
                else:
                    self.wait_for_async_data_load_to_complete(tasks)
            self.data_validation_collection()
        if self.data_load_stage == "after":
            self.sync_data_load()
            self.data_validation_collection()

    def test_data_load_collections_with_rebalance_in(self):
        self.load_collections_with_rebalance(
            rebalance_operation="rebalance_in")

    def test_data_load_collections_with_rebalance_out(self):
        self.load_collections_with_rebalance(
            rebalance_operation="rebalance_out")

    def test_data_load_collections_with_swap_rebalance(self):
        self.load_collections_with_rebalance(
            rebalance_operation="swap_rebalance")

    def test_data_load_collections_with_rebalance_in_out(self):
        self.load_collections_with_rebalance(
            rebalance_operation="rebalance_in_out")

    def test_data_load_collections_with_graceful_failover_rebalance_out(self):
        self.load_collections_with_rebalance(
            rebalance_operation="graceful_failover_rebalance_out")

    def test_data_load_collections_with_hard_failover_rebalance_out(self):
        self.load_collections_with_rebalance(
            rebalance_operation="hard_failover_rebalance_out")

    def test_data_load_collections_with_graceful_failover_recovery(self):
        self.load_collections_with_rebalance(
            rebalance_operation="graceful_failover_recovery")

    def test_data_load_collections_with_hard_failover_recovery(self):
        self.load_collections_with_rebalance(
            rebalance_operation="hard_failover_recovery")

    def test_data_load_collections_with_forced_hard_failover_rebalance_out(
            self):
        self.load_collections_with_rebalance(
            rebalance_operation="forced_hard_failover_rebalance_out")
Esempio n. 3
0
class volume(BaseTestCase):
    def setUp(self):
        self.input = TestInputSingleton.input
        self.input.test_params.update({"default_bucket": False})
        BaseTestCase.setUp(self)
        self.rest = RestConnection(self.servers[0])
        self.op_type = self.input.param("op_type", "create")
        self.available_servers = list()
        self.available_servers = self.cluster.servers[self.nodes_init:]
        self.num_buckets = self.input.param("num_buckets", 1)
        self.mutate = 0
        self.doc_ops = self.input.param("doc_ops", None)
        if self.doc_ops:
            self.doc_ops = self.doc_ops.split(';')
        self.iterations = self.input.param("iterations", 2)
        self.vbucket_check = self.input.param("vbucket_check", True)
        self.new_num_writer_threads = self.input.param(
            "new_num_writer_threads", 6)
        self.new_num_reader_threads = self.input.param(
            "new_num_reader_threads", 8)
        self.create_perc = 100
        self.update_perc = self.input.param("update_perc", 50)
        self.delete_perc = self.input.param("delete_perc", 50)
        self.expiry_perc = self.input.param("expiry_perc", 0)
        self.start = 0
        self.end = 0
        self.initial_items = self.start
        self.final_items = self.end
        self.create_end = 0
        self.create_start = 0
        self.update_end = 0
        self.update_start = 0
        self.delete_end = 0
        self.delete_start = 0
        self.expire_end = 0
        self.expire_start = 0
        self.num_collections = self.input.param("num_collections", 10)

    def create_required_buckets(self):
        self.log.info("Get the available memory quota")
        self.info = self.rest.get_nodes_self()
        threshold_memory = 100
        # threshold_memory_vagrant = 100
        total_memory_in_mb = self.info.mcdMemoryReserved
        total_available_memory_in_mb = total_memory_in_mb

        # If the mentioned service is already present,
        # we remove that much memory from available memory quota
        if "index" in self.info.services:
            total_available_memory_in_mb -= self.info.indexMemoryQuota
        if "fts" in self.info.services:
            total_available_memory_in_mb -= self.info.ftsMemoryQuota
        if "cbas" in self.info.services:
            total_available_memory_in_mb -= self.info.cbasMemoryQuota
        if "eventing" in self.info.services:
            total_available_memory_in_mb -= self.info.eventingMemoryQuota

        available_memory = total_available_memory_in_mb - threshold_memory

        self.rest.set_service_memoryQuota(service='memoryQuota',
                                          memoryQuota=available_memory)

        # Creating buckets for data loading purpose
        self.log.info("Create CB buckets")
        self.bucket_expiry = self.input.param("bucket_expiry", 0)
        ramQuota = self.input.param("ramQuota", available_memory)
        buckets = self.input.param("bucket_names", "GleamBookUsers").split(';')
        self.bucket_type = self.bucket_type.split(';')
        self.compression_mode = self.compression_mode.split(';')
        self.bucket_eviction_policy = self.bucket_eviction_policy
        for i in range(self.num_buckets):
            bucket = Bucket({
                Bucket.name: buckets[i],
                Bucket.ramQuotaMB: ramQuota / self.num_buckets,
                Bucket.maxTTL: self.bucket_expiry,
                Bucket.replicaNumber: self.num_replicas,
                Bucket.storageBackend: self.bucket_storage,
                Bucket.evictionPolicy: self.bucket_eviction_policy,
                Bucket.bucketType: self.bucket_type[i],
                Bucket.compressionMode: self.compression_mode[i]
            })
            self.bucket_util.create_bucket(bucket)

        # rebalance the new buckets across all nodes.
        self.log.info("Rebalance Starts")
        self.nodes = self.rest.node_statuses()
        self.rest.rebalance(otpNodes=[node.id for node in self.nodes],
                            ejectedNodes=[])
        self.rest.monitorRebalance()
        return bucket

    def set_num_writer_and_reader_threads(self,
                                          num_writer_threads="default",
                                          num_reader_threads="default"):
        for node in self.cluster_util.get_kv_nodes():
            bucket_helper = BucketHelper(node)
            bucket_helper.update_memcached_settings(
                num_writer_threads=num_writer_threads,
                num_reader_threads=num_reader_threads)

    def generate_docs(self, doc_ops=None):
        self.gen_delete = None
        self.gen_create = None
        self.gen_update = None
        self.gen_expiry = None
        self.create_end = 0
        self.create_start = 0
        self.update_end = 0
        self.update_start = 0
        self.delete_end = 0
        self.delete_start = 0
        self.expire_end = 0
        self.expire_start = 0
        self.initial_items = self.final_items

        if doc_ops is None:
            doc_ops = self.doc_ops

        if "update" in doc_ops:
            self.update_start = 0
            self.update_end = self.num_items * self.update_perc / 100
            self.mutate += 1
            self.gen_update = doc_generator(
                "Users",
                self.update_start,
                self.update_end,
                doc_size=self.doc_size,
                doc_type=self.doc_type,
                target_vbucket=self.target_vbucket,
                vbuckets=self.cluster_util.vbuckets,
                key_size=self.key_size,
                randomize_doc_size=self.randomize_doc_size,
                randomize_value=self.randomize_value,
                mix_key_size=self.mix_key_size,
                mutate=self.mutate)

        if "delete" in doc_ops:
            self.delete_start = self.start
            self.delete_end = self.start + (self.num_items *
                                            self.delete_perc) / 100
            self.gen_delete = doc_generator(
                "Users",
                self.delete_start,
                self.delete_end,
                doc_size=self.doc_size,
                doc_type=self.doc_type,
                target_vbucket=self.target_vbucket,
                vbuckets=self.cluster_util.vbuckets,
                key_size=self.key_size,
                randomize_doc_size=self.randomize_doc_size,
                randomize_value=self.randomize_value,
                mix_key_size=self.mix_key_size)
            self.final_items -= (self.delete_end -
                                 self.delete_start) * self.num_collections

        if "expiry" in doc_ops and self.maxttl:
            self.expire_start = self.start + (self.num_items *
                                              self.delete_perc) / 100
            self.expire_end = self.start + self.num_items * (
                self.delete_perc + self.expiry_perc) / 100
            self.gen_expiry = doc_generator(
                "Users",
                self.expire_start,
                self.expire_end,
                doc_size=self.doc_size,
                doc_type=self.doc_type,
                target_vbucket=self.target_vbucket,
                vbuckets=self.cluster_util.vbuckets,
                key_size=self.key_size,
                randomize_doc_size=self.randomize_doc_size,
                randomize_value=self.randomize_value,
                mix_key_size=self.mix_key_size)
            self.final_items -= (self.expire_end -
                                 self.expire_start) * self.num_collections

        if "create" in doc_ops:
            self.start = self.end
            self.end += self.num_items * self.create_perc / 100
            self.create_start = self.start
            self.create_end = self.end
            self.gen_create = doc_generator(
                "Users",
                self.start,
                self.end,
                doc_size=self.doc_size,
                doc_type=self.doc_type,
                target_vbucket=self.target_vbucket,
                vbuckets=self.cluster_util.vbuckets,
                key_size=self.key_size,
                randomize_doc_size=self.randomize_doc_size,
                randomize_value=self.randomize_value,
                mix_key_size=self.mix_key_size)
            self.final_items += (self.end - self.start) * self.num_collections

    def doc_loader(self, op_type, kv_gen, exp=0, scope=None, collection=None):
        if scope is None:
            scope = CbServer.default_scope
        if collection is None:
            collection = CbServer.default_collection
        retry_exceptions = [
            SDKException.AmbiguousTimeoutException,
            SDKException.RequestCanceledException
        ]
        tasks_info = self.bucket_util._async_load_all_buckets(
            self.cluster,
            kv_gen,
            op_type,
            exp,
            batch_size=self.batch_size,
            process_concurrency=self.process_concurrency,
            persist_to=self.persist_to,
            replicate_to=self.replicate_to,
            durability=self.durability_level,
            pause_secs=5,
            timeout_secs=self.sdk_timeout,
            retries=self.sdk_retries,
            retry_exceptions=retry_exceptions,
            scope=scope,
            collection=collection)
        return tasks_info

    def data_load(self,
                  scope=CbServer.default_scope,
                  collections=[CbServer.default_scope]):
        tasks_info = dict()
        for collection in collections:
            if self.gen_update is not None:
                task_info = self.doc_loader("update",
                                            self.gen_update,
                                            scope=scope,
                                            collection=collection)
                tasks_info.update(task_info.items())
            if self.gen_create is not None:
                task_info = self.doc_loader("create",
                                            self.gen_create,
                                            scope=scope,
                                            collection=collection)
                tasks_info.update(task_info.items())
            if self.gen_delete is not None:
                task_info = self.doc_loader("delete",
                                            self.gen_delete,
                                            scope=scope,
                                            collection=collection)
                tasks_info.update(task_info.items())
            if self.gen_expiry is not None and self.maxttl:
                task_info = self.doc_loader("update",
                                            self.gen_expiry,
                                            self.maxttl,
                                            scope=scope,
                                            collection=collection)
                tasks_info.update(task_info.items())
        return tasks_info

    def data_validation(self,
                        tasks_info,
                        scope=CbServer.default_scope,
                        collections=[CbServer.default_scope],
                        check_docs=True):
        for task in tasks_info:
            self.task_manager.get_task_result(task)
        self.bucket_util.verify_doc_op_task_exceptions(tasks_info,
                                                       self.cluster)
        self.bucket_util.log_doc_ops_task_failures(tasks_info)
        for task, task_info in tasks_info.items():
            self.assertFalse(
                task_info["ops_failed"],
                "Doc ops failed for task: {}".format(task.thread_name))

        if check_docs:
            self.log.info("Validating Active/Replica Docs")
            self.check_replica = False
            for bucket in self.bucket_util.buckets:
                tasks = list()
                for collection in collections:
                    if self.gen_update is not None:
                        tasks.append(
                            self.task.async_validate_docs(
                                self.cluster,
                                bucket,
                                self.gen_update,
                                "update",
                                0,
                                batch_size=self.batch_size,
                                process_concurrency=self.process_concurrency,
                                pause_secs=5,
                                timeout_secs=self.sdk_timeout,
                                check_replica=self.check_replica,
                                scope=scope,
                                collection=collection))
                    if self.gen_create is not None:
                        tasks.append(
                            self.task.async_validate_docs(
                                self.cluster,
                                bucket,
                                self.gen_create,
                                "create",
                                0,
                                batch_size=self.batch_size,
                                process_concurrency=self.process_concurrency,
                                pause_secs=5,
                                timeout_secs=self.sdk_timeout,
                                check_replica=self.check_replica,
                                scope=scope,
                                collection=collection))
                    if self.gen_delete is not None:
                        tasks.append(
                            self.task.async_validate_docs(
                                self.cluster,
                                bucket,
                                self.gen_delete,
                                "delete",
                                0,
                                batch_size=self.batch_size,
                                process_concurrency=self.process_concurrency,
                                pause_secs=5,
                                timeout_secs=self.sdk_timeout,
                                check_replica=self.check_replica,
                                scope=scope,
                                collection=collection))
                    if self.gen_expiry is not None:
                        self.sleep(
                            self.maxttl,
                            "Wait for docs to expire until expiry time..")
                        tasks.append(
                            self.task.async_validate_docs(
                                self.cluster,
                                bucket,
                                self.gen_expiry,
                                "delete",
                                0,
                                batch_size=self.batch_size,
                                process_concurrency=self.process_concurrency,
                                pause_secs=5,
                                timeout_secs=self.sdk_timeout,
                                check_replica=self.check_replica,
                                scope=scope,
                                collection=collection))
                for task in tasks:
                    self.task.jython_task_manager.get_task_result(task)
        self.bucket_util._wait_for_stats_all_buckets()
#         self.bucket_util.verify_stats_all_buckets(self.final_items)

    def get_bucket_dgm(self, bucket):
        self.rest_client = BucketHelper(self.cluster.master)
        dgm = self.rest_client.fetch_bucket_stats(
            bucket.name)["op"]["samples"]["vb_active_resident_items_ratio"][-1]
        self.log.info("Active Resident Threshold of {0} is {1}".format(
            bucket.name, dgm))

    # Stopping and restarting the memcached process

    def stop_process(self):
        target_node = self.servers[2]
        remote = RemoteMachineShellConnection(target_node)
        error_sim = CouchbaseError(self.log, remote)
        error_to_simulate = "stop_memcached"
        # Induce the error condition
        error_sim.create(error_to_simulate)
        self.sleep(20, "Wait before reverting the error condition")
        # Revert the simulated error condition and close the ssh session
        error_sim.revert(error_to_simulate)
        remote.disconnect()

    def rebalance(self, nodes_in=0, nodes_out=0):
        servs_in = random.sample(self.available_servers, nodes_in)

        self.nodes_cluster = self.cluster.nodes_in_cluster[:]
        self.nodes_cluster.remove(self.cluster.master)
        servs_out = random.sample(self.nodes_cluster, nodes_out)

        if nodes_in == nodes_out:
            self.vbucket_check = False

        rebalance_task = self.task.async_rebalance(
            self.cluster.servers[:self.nodes_init],
            servs_in,
            servs_out,
            check_vbucket_shuffling=self.vbucket_check,
            retry_get_process_num=150)

        self.available_servers = [
            servs for servs in self.available_servers if servs not in servs_in
        ]
        self.available_servers += servs_out

        self.cluster.nodes_in_cluster.extend(servs_in)
        self.cluster.nodes_in_cluster = list(
            set(self.cluster.nodes_in_cluster) - set(servs_out))
        return rebalance_task

    def print_crud_stats(self):
        self.table = TableView(self.log.info)
        self.table.set_headers([
            "Initial Items", "Current Items", "Items Updated", "Items Created",
            "Items Deleted", "Items Expired"
        ])
        self.table.add_row([
            str(self.initial_items),
            str(self.final_items),
            str(self.update_start) + "-" + str(self.update_end),
            str(self.create_start) + "-" + str(self.create_end),
            str(self.delete_start) + "-" + str(self.delete_end),
            str(self.expire_start) + "-" + str(self.expire_end)
        ])
        self.table.display("Docs statistics")

    def Volume(self):
        #######################################################################
        self.log.info("Step1: Create a n node cluster")
        if self.nodes_init > 1:
            nodes_init = self.cluster.servers[1:self.nodes_init]
            self.task.rebalance([self.cluster.master], nodes_init, [])
            self.cluster.nodes_in_cluster.extend([self.cluster.master] +
                                                 nodes_init)

        #######################################################################
        self.log.info("Step 2 & 3: Create required buckets.")
        self.bucket = self.create_required_buckets()
        self.loop = 0
        scope_name = "VolumeScope"
        collection_prefix = "VolumeCollection"
        self.bucket_util.create_scope(self.cluster.master, self.bucket,
                                      {"name": scope_name})
        for i in range(self.num_collections):
            collection_name = collection_prefix + str(i)
            self.log.info("Creating scope::collection '%s::%s'" %
                          (scope_name, collection_name))
            self.bucket_util.create_collection(self.cluster.master,
                                               self.bucket, scope_name,
                                               {"name": collection_name})
            self.sleep(2)
        #######################################################################
        while self.loop < self.iterations:
            self.log.info("Step 4: Pre-Requisites for Loading of docs")
            self.bucket_util.add_rbac_user()
            self.generate_docs(doc_ops="create")
            tasks_info = self.data_load(
                scope=scope_name,
                collections=self.bucket.scopes[scope_name].collections.keys())
            for task in tasks_info:
                self.task.jython_task_manager.get_task_result(task)
            self.bucket_util.print_bucket_stats()
            self.print_crud_stats()
            self.get_bucket_dgm(self.bucket)
            self.create_perc = self.input.param("create_perc", 100)
            ###################################################################
            self.log.info("Step 5: Rebalance in with Loading of docs")
            self.generate_docs(doc_ops="create")
            self.set_num_writer_and_reader_threads(
                num_writer_threads="disk_io_optimized",
                num_reader_threads="disk_io_optimized")
            rebalance_task = self.rebalance(nodes_in=1, nodes_out=0)
            tasks_info = self.data_load(
                scope=scope_name,
                collections=self.bucket.scopes[scope_name].collections.keys())
            self.set_num_writer_and_reader_threads(
                num_writer_threads=self.new_num_writer_threads,
                num_reader_threads=self.new_num_reader_threads)
            self.task.jython_task_manager.get_task_result(rebalance_task)
            self.assertTrue(rebalance_task.result, "Rebalance Failed")
            self.data_validation(
                tasks_info,
                scope=scope_name,
                collections=self.bucket.scopes[scope_name].collections.keys())
            self.bucket_util.print_bucket_stats()
            self.print_crud_stats()
            self.get_bucket_dgm(self.bucket)

            ###################################################################
            self.log.info("Step 6: Rebalance Out with Loading of docs")
            self.generate_docs()
            self.set_num_writer_and_reader_threads(
                num_writer_threads="disk_io_optimized",
                num_reader_threads="disk_io_optimized")
            rebalance_task = self.rebalance(nodes_in=0, nodes_out=1)
            tasks_info = self.data_load(
                scope=scope_name,
                collections=self.bucket.scopes[scope_name].collections.keys())
            self.set_num_writer_and_reader_threads(
                num_writer_threads=self.new_num_writer_threads,
                num_reader_threads=self.new_num_reader_threads)
            self.task.jython_task_manager.get_task_result(rebalance_task)
            self.assertTrue(rebalance_task.result, "Rebalance Failed")
            self.data_validation(
                tasks_info,
                scope=scope_name,
                collections=self.bucket.scopes[scope_name].collections.keys())
            self.bucket_util.print_bucket_stats()
            self.print_crud_stats()
            self.get_bucket_dgm(self.bucket)

            ###################################################################
            self.log.info("Step 7: Rebalance In_Out with Loading of docs")
            self.generate_docs()
            self.set_num_writer_and_reader_threads(
                num_writer_threads="disk_io_optimized",
                num_reader_threads="disk_io_optimized")
            rebalance_task = self.rebalance(nodes_in=2, nodes_out=1)
            tasks_info = self.data_load(
                scope=scope_name,
                collections=self.bucket.scopes[scope_name].collections.keys())
            self.set_num_writer_and_reader_threads(
                num_writer_threads=self.new_num_writer_threads,
                num_reader_threads=self.new_num_reader_threads)
            self.task.jython_task_manager.get_task_result(rebalance_task)
            self.assertTrue(rebalance_task.result, "Rebalance Failed")
            self.data_validation(
                tasks_info,
                scope=scope_name,
                collections=self.bucket.scopes[scope_name].collections.keys())
            self.bucket_util.print_bucket_stats()
            self.print_crud_stats()
            self.get_bucket_dgm(self.bucket)

            ###################################################################
            self.log.info("Step 8: Swap with Loading of docs")
            self.generate_docs()
            self.set_num_writer_and_reader_threads(
                num_writer_threads="disk_io_optimized",
                num_reader_threads="disk_io_optimized")
            rebalance_task = self.rebalance(nodes_in=1, nodes_out=1)
            tasks_info = self.data_load(
                scope=scope_name,
                collections=self.bucket.scopes[scope_name].collections.keys())
            self.set_num_writer_and_reader_threads(
                num_writer_threads=self.new_num_writer_threads,
                num_reader_threads=self.new_num_reader_threads)

            self.task.jython_task_manager.get_task_result(rebalance_task)
            self.assertTrue(rebalance_task.result, "Rebalance Failed")
            self.data_validation(
                tasks_info,
                scope=scope_name,
                collections=self.bucket.scopes[scope_name].collections.keys())
            self.bucket_util.print_bucket_stats()
            self.print_crud_stats()
            self.get_bucket_dgm(self.bucket)

            ###################################################################
            self.log.info("Step 9: Updating the bucket replica to 2")
            bucket_helper = BucketHelper(self.cluster.master)
            for i in range(len(self.bucket_util.buckets)):
                bucket_helper.change_bucket_props(self.bucket_util.buckets[i],
                                                  replicaNumber=2)
            self.generate_docs()
            self.set_num_writer_and_reader_threads(
                num_writer_threads="disk_io_optimized",
                num_reader_threads="disk_io_optimized")
            rebalance_task = self.rebalance(nodes_in=1, nodes_out=0)
            tasks_info = self.data_load(
                scope=scope_name,
                collections=self.bucket.scopes[scope_name].collections.keys())
            self.set_num_writer_and_reader_threads(
                num_writer_threads=self.new_num_writer_threads,
                num_reader_threads=self.new_num_reader_threads)
            self.task.jython_task_manager.get_task_result(rebalance_task)
            self.assertTrue(rebalance_task.result, "Rebalance Failed")
            self.data_validation(
                tasks_info,
                scope=scope_name,
                collections=self.bucket.scopes[scope_name].collections.keys())
            self.bucket_util.print_bucket_stats()
            self.print_crud_stats()
            self.get_bucket_dgm(self.bucket)

            ###################################################################
            self.log.info("Step 10: Stopping and restarting memcached process")
            self.generate_docs()
            self.set_num_writer_and_reader_threads(
                num_writer_threads=self.new_num_writer_threads,
                num_reader_threads=self.new_num_reader_threads)
            rebalance_task = self.task.async_rebalance(self.cluster.servers,
                                                       [], [])
            tasks_info = self.data_load(
                scope=scope_name,
                collections=self.bucket.scopes[scope_name].collections.keys())
            self.set_num_writer_and_reader_threads(
                num_writer_threads="disk_io_optimized",
                num_reader_threads="disk_io_optimized")

            self.task.jython_task_manager.get_task_result(rebalance_task)
            self.assertTrue(rebalance_task.result, "Rebalance Failed")
            self.stop_process()
            self.data_validation(
                tasks_info,
                scope=scope_name,
                collections=self.bucket.scopes[scope_name].collections.keys())
            self.bucket_util.print_bucket_stats()
            self.print_crud_stats()
            self.get_bucket_dgm(self.bucket)

            ###################################################################
            self.log.info(
                "Step 11: Failover a node and RebalanceOut that node \
            with loading in parallel")
            self.std_vbucket_dist = self.input.param("std_vbucket_dist", None)
            std = self.std_vbucket_dist or 1.0

            prev_failover_stats = self.bucket_util.get_failovers_logs(
                self.cluster.nodes_in_cluster, self.bucket_util.buckets)

            disk_replica_dataset, disk_active_dataset = self.bucket_util.\
                get_and_compare_active_replica_data_set_all(
                    self.cluster.nodes_in_cluster, self.bucket_util.buckets,
                    path=None)

            self.rest = RestConnection(self.cluster.master)
            self.nodes = self.cluster_util.get_nodes(self.cluster.master)
            self.chosen = self.cluster_util.pick_nodes(self.cluster.master,
                                                       howmany=1)

            # Mark Node for failover
            self.generate_docs()
            tasks_info = self.data_load(
                scope=scope_name,
                collections=self.bucket.scopes[scope_name].collections.keys())
            self.success_failed_over = self.rest.fail_over(self.chosen[0].id,
                                                           graceful=True)
            self.sleep(10)
            self.rest.monitorRebalance()
            self.nodes = self.rest.node_statuses()
            self.set_num_writer_and_reader_threads(
                num_writer_threads=self.new_num_writer_threads,
                num_reader_threads=self.new_num_reader_threads)
            self.rest.rebalance(otpNodes=[node.id for node in self.nodes],
                                ejectedNodes=[self.chosen[0].id])
            self.assertTrue(self.rest.monitorRebalance(stop_if_loop=True),
                            msg="Rebalance failed")

            servs_out = [
                node for node in self.cluster.servers
                if node.ip == self.chosen[0].ip
            ]
            self.cluster.nodes_in_cluster = list(
                set(self.cluster.nodes_in_cluster) - set(servs_out))
            self.available_servers += servs_out

            self.data_validation(
                tasks_info,
                scope=scope_name,
                collections=self.bucket.scopes[scope_name].collections.keys())

            self.bucket_util.compare_failovers_logs(
                prev_failover_stats, self.cluster.nodes_in_cluster,
                self.bucket_util.buckets)

            self.bucket_util.data_analysis_active_replica_all(
                disk_active_dataset,
                disk_replica_dataset,
                self.cluster.servers[:self.nodes_in + self.nodes_init],
                self.bucket_util.buckets,
                path=None)
            nodes = self.cluster_util.get_nodes_in_cluster(self.cluster.master)
            self.bucket_util.vb_distribution_analysis(
                servers=nodes,
                buckets=self.bucket_util.buckets,
                num_replicas=2,
                std=std,
                total_vbuckets=self.cluster_util.vbuckets)
            rebalance_task = self.rebalance(nodes_in=1, nodes_out=0)
            self.task.jython_task_manager.get_task_result(rebalance_task)
            self.bucket_util.print_bucket_stats()
            self.print_crud_stats()
            self.get_bucket_dgm(self.bucket)

            ###################################################################
            self.log.info("Step 12: Failover a node and FullRecovery\
             that node")

            self.std_vbucket_dist = self.input.param("std_vbucket_dist", None)
            std = self.std_vbucket_dist or 1.0

            prev_failover_stats = self.bucket_util.get_failovers_logs(
                self.cluster.nodes_in_cluster, self.bucket_util.buckets)

            disk_replica_dataset, disk_active_dataset = self.bucket_util.\
                get_and_compare_active_replica_data_set_all(
                    self.cluster.nodes_in_cluster,
                    self.bucket_util.buckets,
                    path=None)

            self.rest = RestConnection(self.cluster.master)
            self.nodes = self.cluster_util.get_nodes(self.cluster.master)
            self.chosen = self.cluster_util.pick_nodes(self.cluster.master,
                                                       howmany=1)

            self.generate_docs()
            tasks_info = self.data_load(
                scope=scope_name,
                collections=self.bucket.scopes[scope_name].collections.keys())
            # Mark Node for failover
            self.success_failed_over = self.rest.fail_over(self.chosen[0].id,
                                                           graceful=True)
            self.sleep(10)
            self.rest.monitorRebalance()
            # Mark Node for full recovery
            if self.success_failed_over:
                self.rest.set_recovery_type(otpNode=self.chosen[0].id,
                                            recoveryType="full")

            self.set_num_writer_and_reader_threads(
                num_writer_threads=self.new_num_writer_threads,
                num_reader_threads=self.new_num_reader_threads)

            rebalance_task = self.task.async_rebalance(
                self.cluster.servers[:self.nodes_init], [], [])

            self.set_num_writer_and_reader_threads(
                num_writer_threads="disk_io_optimized",
                num_reader_threads="disk_io_optimized")

            self.task.jython_task_manager.get_task_result(rebalance_task)
            self.assertTrue(rebalance_task.result, "Rebalance Failed")

            self.data_validation(
                tasks_info,
                scope=scope_name,
                collections=self.bucket.scopes[scope_name].collections.keys())

            self.bucket_util.compare_failovers_logs(
                prev_failover_stats, self.cluster.nodes_in_cluster,
                self.bucket_util.buckets)

            self.bucket_util.data_analysis_active_replica_all(
                disk_active_dataset,
                disk_replica_dataset,
                self.cluster.servers[:self.nodes_in + self.nodes_init],
                self.bucket_util.buckets,
                path=None)
            nodes = self.cluster_util.get_nodes_in_cluster(self.cluster.master)
            self.bucket_util.vb_distribution_analysis(
                servers=nodes,
                buckets=self.bucket_util.buckets,
                num_replicas=2,
                std=std,
                total_vbuckets=self.cluster_util.vbuckets)
            self.bucket_util.print_bucket_stats()
            self.print_crud_stats()
            self.get_bucket_dgm(self.bucket)

            ###################################################################
            self.log.info("Step 13: Failover a node and DeltaRecovery that \
            node with loading in parallel")

            self.std_vbucket_dist = self.input.param("std_vbucket_dist", None)
            std = self.std_vbucket_dist or 1.0

            prev_failover_stats = self.bucket_util.get_failovers_logs(
                self.cluster.nodes_in_cluster, self.bucket_util.buckets)

            disk_replica_dataset, disk_active_dataset = self.bucket_util.\
                get_and_compare_active_replica_data_set_all(
                    self.cluster.nodes_in_cluster,
                    self.bucket_util.buckets,
                    path=None)

            self.rest = RestConnection(self.cluster.master)
            self.nodes = self.cluster_util.get_nodes(self.cluster.master)
            self.chosen = self.cluster_util.pick_nodes(self.cluster.master,
                                                       howmany=1)

            self.generate_docs()
            tasks_info = self.data_load(
                scope=scope_name,
                collections=self.bucket.scopes[scope_name].collections.keys())
            # Mark Node for failover
            self.success_failed_over = self.rest.fail_over(self.chosen[0].id,
                                                           graceful=True)
            self.sleep(10)
            self.rest.monitorRebalance()
            if self.success_failed_over:
                self.rest.set_recovery_type(otpNode=self.chosen[0].id,
                                            recoveryType="delta")
            self.set_num_writer_and_reader_threads(
                num_writer_threads=self.new_num_writer_threads,
                num_reader_threads=self.new_num_reader_threads)

            rebalance_task = self.task.async_rebalance(
                self.cluster.servers[:self.nodes_init], [], [])
            self.set_num_writer_and_reader_threads(
                num_writer_threads="disk_io_optimized",
                num_reader_threads="disk_io_optimized")
            self.task.jython_task_manager.get_task_result(rebalance_task)
            self.assertTrue(rebalance_task.result, "Rebalance Failed")

            self.data_validation(
                tasks_info,
                scope=scope_name,
                collections=self.bucket.scopes[scope_name].collections.keys())

            self.bucket_util.compare_failovers_logs(
                prev_failover_stats, self.cluster.nodes_in_cluster,
                self.bucket_util.buckets)

            self.bucket_util.data_analysis_active_replica_all(
                disk_active_dataset,
                disk_replica_dataset,
                self.cluster.servers[:self.nodes_in + self.nodes_init],
                self.bucket_util.buckets,
                path=None)
            nodes = self.cluster_util.get_nodes_in_cluster(self.cluster.master)
            self.bucket_util.vb_distribution_analysis(
                servers=nodes,
                buckets=self.bucket_util.buckets,
                num_replicas=2,
                std=std,
                total_vbuckets=self.cluster_util.vbuckets)
            self.bucket_util.print_bucket_stats()
            self.print_crud_stats()
            self.get_bucket_dgm(self.bucket)

            #######################################################################
            self.log.info("Step 14: Updating the bucket replica to 1")
            bucket_helper = BucketHelper(self.cluster.master)
            for i in range(len(self.bucket_util.buckets)):
                bucket_helper.change_bucket_props(self.bucket_util.buckets[i],
                                                  replicaNumber=1)
            self.generate_docs()
            self.set_num_writer_and_reader_threads(
                num_writer_threads=self.new_num_writer_threads,
                num_reader_threads=self.new_num_reader_threads)
            rebalance_task = self.task.async_rebalance(self.cluster.servers,
                                                       [], [])
            tasks_info = self.data_load(
                scope=scope_name,
                collections=self.bucket.scopes[scope_name].collections.keys())
            self.set_num_writer_and_reader_threads(
                num_writer_threads="disk_io_optimized",
                num_reader_threads="disk_io_optimized")

            self.task.jython_task_manager.get_task_result(rebalance_task)
            self.assertTrue(rebalance_task.result, "Rebalance Failed")
            self.data_validation(
                tasks_info,
                scope=scope_name,
                collections=self.bucket.scopes[scope_name].collections.keys())
            self.bucket_util.print_bucket_stats()
            self.print_crud_stats()
            self.get_bucket_dgm(self.bucket)

            #######################################################################
            self.log.info("Step 15: Flush the bucket and \
            start the entire process again")
            self.loop += 1
            if self.loop < self.iterations:
                # Flush the bucket
                self.bucket_util.flush_all_buckets(self.cluster.master)
                self.sleep(10)
                if len(self.cluster.nodes_in_cluster) > self.nodes_init:
                    nodes_cluster = self.cluster.nodes_in_cluster[:]
                    nodes_cluster.remove(self.cluster.master)
                    servs_out = random.sample(
                        nodes_cluster,
                        int(
                            len(self.cluster.nodes_in_cluster) -
                            self.nodes_init))
                    rebalance_task = self.task.async_rebalance(
                        self.cluster.servers[:self.nodes_init], [], servs_out)

                    self.task.jython_task_manager.get_task_result(
                        rebalance_task)
                    self.available_servers += servs_out
                    self.cluster.nodes_in_cluster = list(
                        set(self.cluster.nodes_in_cluster) - set(servs_out))
                    self.get_bucket_dgm(self.bucket)
            else:
                self.log.info("Volume Test Run Complete")
                self.get_bucket_dgm(self.bucket)

    def SteadyStateVolume(self):
        #######################################################################
        self.log.info("Step 1: Create a n node cluster")
        if self.nodes_init > 1:
            nodes_init = self.cluster.servers[1:self.nodes_init]
            self.task.rebalance([self.cluster.master], nodes_init, [])
            self.cluster.nodes_in_cluster.extend([self.cluster.master] +
                                                 nodes_init)

        #######################################################################
        self.log.info("Step 2: Create required buckets.")
        self.bucket = self.create_required_buckets()
        self.loop = 0
        scope_name = "VolumeScope"
        collection_prefix = "VolumeCollection"
        self.bucket_util.create_scope(self.cluster.master, self.bucket,
                                      {"name": scope_name})
        for i in range(self.num_collections):
            collection_name = collection_prefix + str(i)
            self.log.info("Creating scope::collection '%s::%s'" %
                          (scope_name, collection_name))
            self.bucket_util.create_collection(self.cluster.master,
                                               self.bucket, scope_name,
                                               {"name": collection_name})
            self.sleep(2)
        #######################################################################
        self.log.info("Step 3: Per-Requisites for Loading of docs")

        self.create_perc = 100
        _iter = 0
        while _iter < 2:
            self.generate_docs(doc_ops="create")
            tasks_info = self.data_load(
                scope=scope_name,
                collections=self.bucket.scopes[scope_name].collections.keys())
            self.data_validation(tasks_info, check_docs=False)
            self.bucket_util.print_bucket_stats()
            self.print_crud_stats()
            self.get_bucket_dgm(self.bucket)
            _iter += 1

        _iter = 0
        self.update_perc = 100
        while _iter < 10:
            self.generate_docs(doc_ops="update")
            tasks_info = self.data_load(
                scope=scope_name,
                collections=self.bucket.scopes[scope_name].collections.keys())
            self.data_validation(
                tasks_info,
                scope=scope_name,
                collections=self.bucket.scopes[scope_name].collections.keys())
            self.bucket_util.print_bucket_stats()
            self.print_crud_stats()
            self.get_bucket_dgm(self.bucket)
            _iter += 1

        for i in range(1, self.num_collections, 2):
            collection_name = collection_prefix + str(i)
            self.bucket_util.drop_collection(self.cluster.master, self.bucket,
                                             scope_name, collection_name)
            self.bucket.scopes[scope_name].collections.pop(collection_name)

        self.update_perc = self.input.param("update_perc", 100)
        self.create_perc = self.input.param("create_perc", 100)
        _iter = 0
        while _iter < 10:
            self.generate_docs()
            tasks_info = self.data_load(
                scope=scope_name,
                collections=self.bucket.scopes[scope_name].collections.keys())
            self.data_validation(
                tasks_info,
                scope=scope_name,
                collections=self.bucket.scopes[scope_name].collections.keys())
            self.bucket_util.print_bucket_stats()
            self.print_crud_stats()
            self.get_bucket_dgm(self.bucket)
            _iter += 1
Esempio n. 4
0
    def test_MB_40531(self):
        """
        Test to validate,
        1. Active resident ratio on the nodes never goes
           down below the replica_rr value
        2. 'evictable' (vb_replica_itm_mem - vb_replica_meta_data_mem) value
           never goes below wm_threshold of total bucket memory (ep_max_size)
        :return:
        """
        def check_replica_eviction():
            tbl = TableView(self.log.info)
            tbl.set_headers([
                "Node", "Memory", "WM_Threshold", "Itm_mem", "Meta_mem",
                "Evictable_mem", "A_rr", "R_rr"
            ])
            while self.test_failure is None and run_eviction_check:
                tbl.rows = []
                for kv_node in node_data.keys():
                    all_stats = \
                        node_data[kv_node]["cbstat"].all_stats(bucket.name)
                    bucket_mem = int(all_stats["ep_max_size"])
                    wm_threshold = \
                        (float(all_stats["ep_mem_high_wat_percent"])
                         - float(all_stats["ep_mem_low_wat_percent"]))*100
                    evictable_mem = \
                        int(all_stats["vb_replica_itm_memory"]) \
                        - int(all_stats["vb_replica_meta_data_memory"])
                    active_rr = int(all_stats["vb_active_perc_mem_resident"])
                    replica_rr = int(all_stats["vb_replica_perc_mem_resident"])

                    tbl.add_row([
                        kv_node.ip,
                        str(bucket_mem),
                        str(wm_threshold), all_stats["vb_replica_itm_memory"],
                        all_stats["vb_replica_meta_data_memory"],
                        str(evictable_mem),
                        str(active_rr),
                        str(replica_rr)
                    ])

                    if active_rr != 100 \
                            and evictable_mem > (bucket_mem/wm_threshold):
                        tbl.display("Node memory stats")
                        self.log_failure("%s - Active keys evicted before "
                                         "meeting the threshold: %s" %
                                         (kv_node.ip, all_stats))

                    if replica_rr > active_rr:
                        tbl.display("Node memory stats")
                        self.log_failure(
                            "%s: (active_rr) %s < %s (replica_rr)" %
                            (kv_node.ip, active_rr, replica_rr))

        bucket = self.bucket_util.buckets[0]
        node_data = dict()
        kv_nodes = self.cluster_util.get_kv_nodes()
        for node in kv_nodes:
            cbstat = Cbstats(RemoteMachineShellConnection(node))
            node_data[node] = dict()
            node_data[node]["cbstat"] = cbstat
            node_data[node]["active"] = cbstat.vbucket_list(
                bucket.name, "active")
            node_data[node]["replica"] = cbstat.vbucket_list(
                bucket.name, "replica")

        target_dgm = 30
        run_eviction_check = True
        bucket_helper = BucketHelper(self.cluster.master)

        eviction_check_thread = Thread(target=check_replica_eviction)
        eviction_check_thread.start()

        op_index = 0
        op_batch_size = 8000
        create_batch_size = 10000

        # Perform ADD/SET/READ until targeted DGM value is reached
        curr_dgm = bucket_helper.fetch_bucket_stats(
            bucket.name)["op"]["samples"]["vb_active_resident_items_ratio"][-1]
        self.log.info("Wait for DGM to reach %s%%. Current DGM: %s%%" %
                      (target_dgm, curr_dgm))
        while int(curr_dgm) > target_dgm and self.test_failure is None:
            create_gen = doc_generator(self.key,
                                       self.num_items,
                                       self.num_items + create_batch_size,
                                       key_size=self.key_size,
                                       doc_size=self.doc_size,
                                       mutation_type="ADD")
            update_gen = doc_generator(self.key,
                                       op_index,
                                       op_index + op_batch_size,
                                       key_size=self.key_size,
                                       doc_size=self.doc_size,
                                       mutation_type="ADD")
            read_gen = doc_generator(self.key,
                                     op_index,
                                     op_index + op_batch_size,
                                     key_size=self.key_size,
                                     doc_size=0)

            create_task = self.task.async_load_gen_docs(
                self.cluster,
                bucket,
                create_gen,
                "create",
                0,
                persist_to=self.persist_to,
                replicate_to=self.replicate_to,
                durability=self.durability_level,
                timeout_secs=self.sdk_timeout,
                print_ops_rate=False,
                batch_size=200,
                process_concurrency=1)
            update_task = self.task.async_load_gen_docs(
                self.cluster,
                bucket,
                update_gen,
                "update",
                0,
                persist_to=self.persist_to,
                replicate_to=self.replicate_to,
                durability=self.durability_level,
                timeout_secs=self.sdk_timeout,
                print_ops_rate=False,
                batch_size=200,
                process_concurrency=1)
            read_task = self.task.async_load_gen_docs(
                self.cluster,
                bucket,
                read_gen,
                "read",
                timeout_secs=self.sdk_timeout,
                print_ops_rate=False,
                batch_size=200,
                process_concurrency=1)

            self.task_manager.get_task_result(create_task)
            self.task_manager.get_task_result(update_task)
            self.task_manager.get_task_result(read_task)

            # Update indexes for next iteration
            op_index += op_batch_size
            self.num_items += create_batch_size

            curr_dgm = bucket_helper.fetch_bucket_stats(
                bucket.name
            )["op"]["samples"]["vb_active_resident_items_ratio"][-1]
            self.log.info("Current DGM: %s%%" % curr_dgm)

        # Stop eviction check thread
        run_eviction_check = False
        eviction_check_thread.join()

        # Close shell connections
        for node in kv_nodes:
            node_data[node]["cbstat"].shellConn.disconnect()

        self.validate_test_failure()
Esempio n. 5
0
class MagmaBaseTest(BaseTestCase):
    def setUp(self):
        super(MagmaBaseTest, self).setUp()
        self.vbuckets = self.input.param("vbuckets",
                                         self.cluster_util.vbuckets)
        self.rest = RestConnection(self.cluster.master)
        self.bucket_ram_quota = self.input.param("bucket_ram_quota", None)
        self.fragmentation = int(self.input.param("fragmentation", 50))
        self.check_temporary_failure_exception = False
        self.retry_exceptions = [
            SDKException.TimeoutException,
            SDKException.AmbiguousTimeoutException,
            SDKException.RequestCanceledException,
            SDKException.UnambiguousTimeoutException
        ]
        self.ignore_exceptions = []
        # Sets autocompaction at bucket level
        self.autoCompactionDefined = str(
            self.input.param("autoCompactionDefined", "false")).lower()
        # Create Cluster
        self.rest.init_cluster(username=self.cluster.master.rest_username,
                               password=self.cluster.master.rest_password)

        nodes_init = self.cluster.servers[1:self.nodes_init]
        self.services = ["kv"] * (self.nodes_init)

        self.dcp_services = self.input.param("dcp_services", None)
        self.dcp_servers = []
        if self.dcp_services:
            server = self.rest.get_nodes_self()
            self.rest.set_service_memoryQuota(
                service='indexMemoryQuota',
                memoryQuota=int(server.mcdMemoryReserved - 100))
            self.dcp_services = [
                service.replace(":", ",")
                for service in self.dcp_services.split("-")
            ]
            self.services.extend(self.dcp_services)
            self.dcp_servers = self.cluster.servers[self.nodes_init:self.
                                                    nodes_init +
                                                    len(self.dcp_services)]
        nodes_in = nodes_init + self.dcp_servers
        result = self.task.rebalance([self.cluster.master],
                                     nodes_in, [],
                                     services=self.services[1:])
        self.assertTrue(result, "Initial rebalance failed")
        self.cluster.nodes_in_cluster.extend([self.cluster.master] + nodes_in)
        for idx, node in enumerate(self.cluster.nodes_in_cluster):
            node.services = self.services[idx]
        # Create Buckets
        self.bucket_storage = self.input.param("bucket_storage",
                                               Bucket.StorageBackend.magma)
        self.bucket_eviction_policy = self.input.param(
            "bucket_eviction_policy", Bucket.EvictionPolicy.FULL_EVICTION)
        self.bucket_util.add_rbac_user()
        self.bucket_name = self.input.param("bucket_name", None)

        self.magma_buckets = self.input.param("magma_buckets", 0)
        if self.standard_buckets > 10:
            self.bucket_util.change_max_buckets(self.standard_buckets)
        if self.standard_buckets == 1:
            self._create_default_bucket()
        else:
            self._create_multiple_buckets()

        self.buckets = self.bucket_util.buckets

        # sel.num_collections=1 signifies only default collection
        self.num_collections = self.input.param("num_collections", 1)
        self.num_scopes = self.input.param("num_scopes", 1)

        self.scope_name = CbServer.default_scope
        # Creation of scopes of num_scopes is > 1
        scope_prefix = "Scope"
        for bucket in self.bucket_util.buckets:
            for i in range(1, self.num_scopes):
                scope_name = scope_prefix + str(i)
                self.log.info("Creating bucket::scope {} {}\
                ".format(bucket.name, scope_name))
                self.bucket_util.create_scope(self.cluster.master, bucket,
                                              {"name": scope_name})
                self.sleep(2)
        self.scopes = self.buckets[0].scopes.keys()
        self.log.info("Scopes list is {}".format(self.scopes))

        collection_prefix = "FunctionCollection"
        # Creation of collection of num_collections is > 1
        for bucket in self.bucket_util.buckets:
            for scope_name in self.scopes:
                for i in range(1, self.num_collections):
                    collection_name = collection_prefix + str(i)
                    self.log.info("Creating scope::collection {} {}\
                    ".format(scope_name, collection_name))
                    self.bucket_util.create_collection(
                        self.cluster.master, bucket, scope_name,
                        {"name": collection_name})
                    self.sleep(2)
        self.collections = self.buckets[0].scopes[
            self.scope_name].collections.keys()
        self.log.debug("Collections list == {}".format(self.collections))

        if self.dcp_services and self.num_collections == 1:
            self.initial_idx = "initial_idx"
            self.initial_idx_q = "CREATE INDEX %s on default:`%s`.`%s`.`%s`(meta().id) with \
                {\"defer_build\": false};" % (
                self.initial_idx, self.buckets[0].name, self.scope_name,
                self.collections[0])
            self.query_client = RestConnection(self.dcp_servers[0])
            result = self.query_client.query_tool(self.initial_idx_q)
            self.assertTrue(result["status"] == "success",
                            "Index query failed!")

        # Update Magma/Storage Properties
        props = "magma"
        update_bucket_props = False

        self.disable_magma_commit_points = self.input.param(
            "disable_magma_commit_points", False)
        self.max_commit_points = self.input.param("max_commit_points", None)

        if self.disable_magma_commit_points:
            self.max_commit_points = 0

        if self.max_commit_points is not None:
            props += ";magma_max_checkpoints={}".format(self.max_commit_points)
            self.log.debug("props== {}".format(props))
            update_bucket_props = True

        if update_bucket_props:
            self.bucket_util.update_bucket_props("backend", props,
                                                 self.bucket_util.buckets)

        # Monitor Stats Params
        self.ep_queue_stats = self.input.param("ep_queue_stats", True)
        self.monitor_stats = ["doc_ops", "ep_queue_size"]
        if not self.ep_queue_stats:
            self.monitor_stats = ["doc_ops"]
        #Disk usage before data load
        self.disk_usage_before_loading = self.get_disk_usage(
            self.buckets[0], self.cluster.nodes_in_cluster)[0]
        self.log.info("disk usage before loading {}".format(
            self.disk_usage_before_loading))

        # Doc controlling params
        self.key = 'test_docs'
        if self.random_key:
            self.key = "random_keys"
        self.doc_ops = self.input.param("doc_ops", "create")
        self.key_size = self.input.param("key_size", 8)
        self.doc_size = self.input.param("doc_size", 2048)
        self.gen_create = None
        self.gen_delete = None
        self.gen_read = None
        self.gen_update = None
        self.create_perc = self.input.param("update_perc", 100)
        self.update_perc = self.input.param("update_perc", 0)
        self.delete_perc = self.input.param("delete_perc", 0)
        self.expiry_perc = self.input.param("expiry_perc", 0)
        self.start = 0
        self.end = 0
        self.create_start = None
        self.create_end = None
        self.update_start = None
        self.update_end = None
        self.delete_start = None
        self.delete_end = None
        self.read_start = None
        self.read_end = None
        self.expiry_start = None
        self.expiry_end = None
        self.mutate = 0
        self.init_items_per_collection = self.num_items
        self.maxttl = self.input.param("maxttl", 10)

        # Common test params
        self.test_itr = self.input.param("test_itr", 4)
        self.update_itr = self.input.param("update_itr", 2)
        self.next_half = self.input.param("next_half", False)
        self.deep_copy = self.input.param("deep_copy", False)
        if self.active_resident_threshold < 100:
            self.check_temporary_failure_exception = True
        # self.thread_count is used to define number of thread use
        # to read same number of documents parallelly
        self.read_thread_count = self.input.param("read_thread_count", 4)
        self.disk_usage = dict()

        # Initial Data Load
        self.initial_load()
        self.log.info("==========Finished magma base setup========")

    def initial_load(self):
        self.create_start = 0
        self.create_end = self.init_items_per_collection
        if self.rev_write:
            self.create_start = -int(self.init_items_per_collection - 1)
            self.create_end = 1

        self.generate_docs(doc_ops="create")
        self.init_loading = self.input.param("init_loading", True)
        self.dgm_batch = self.input.param("dgm_batch", 5000)
        if self.init_loading:
            self.log.debug("initial_items_in_each_collection {}".format(
                self.init_items_per_collection))

            tasks_info = dict()
            for collection in self.collections:
                self.generate_docs(doc_ops="create", target_vbucket=None)
                tem_tasks_info = self.loadgen_docs(self.retry_exceptions,
                                                   self.ignore_exceptions,
                                                   scope=self.scope_name,
                                                   collection=collection,
                                                   _sync=False,
                                                   doc_ops="create")
                tasks_info.update(tem_tasks_info.items())
            for task in tasks_info:
                self.task_manager.get_task_result(task)
            self.bucket_util.verify_doc_op_task_exceptions(
                tasks_info, self.cluster)
            self.bucket_util.log_doc_ops_task_failures(tasks_info)
            self.bucket_util._wait_for_stats_all_buckets(timeout=3600)
            if self.standard_buckets == 1 or self.standard_buckets == self.magma_buckets:
                for bucket in self.bucket_util.get_all_buckets():
                    disk_usage = self.get_disk_usage(
                        bucket, self.cluster.nodes_in_cluster)
                    self.disk_usage[bucket.name] = disk_usage[0]
                    self.log.info(
                        "For bucket {} disk usage after initial creation is {}MB\
                        ".format(bucket.name, self.disk_usage[bucket.name]))
            self.num_items = self.init_items_per_collection * self.num_collections
        self.read_start = 0
        self.read_end = self.init_items_per_collection

    def _create_default_bucket(self):
        self.bucket_util.create_default_bucket(
            bucket_type=self.bucket_type,
            ram_quota=self.bucket_ram_quota,
            replica=self.num_replicas,
            storage=self.bucket_storage,
            eviction_policy=self.bucket_eviction_policy,
            autoCompactionDefined=self.autoCompactionDefined,
            fragmentation_percentage=self.fragmentation)

    def _create_multiple_buckets(self):
        buckets_created = self.bucket_util.create_multiple_buckets(
            self.cluster.master,
            self.num_replicas,
            bucket_count=self.standard_buckets,
            bucket_type=self.bucket_type,
            storage={
                "couchstore": self.standard_buckets - self.magma_buckets,
                "magma": self.magma_buckets
            },
            eviction_policy=self.bucket_eviction_policy,
            bucket_name=self.bucket_name,
            fragmentation_percentage=self.fragmentation)
        self.assertTrue(buckets_created, "Unable to create multiple buckets")

        for bucket in self.bucket_util.buckets:
            ready = self.bucket_util.wait_for_memcached(
                self.cluster.master, bucket)
            self.assertTrue(ready, msg="Wait_for_memcached failed")

    def tearDown(self):
        self.cluster_util.print_cluster_stats()
        dgm = None
        timeout = 65
        while dgm is None and timeout > 0:
            try:
                stats = BucketHelper(self.cluster.master).fetch_bucket_stats(
                    self.buckets[0].name)
                dgm = stats["op"]["samples"]["vb_active_resident_items_ratio"][
                    -1]
            except:
                self.log.debug(
                    "Fetching vb_active_resident_items_ratio(dgm) failed...retying"
                )
                timeout -= 1
                time.sleep(1)
        self.log.info("## Active Resident Threshold of {0} is {1} ##".format(
            self.buckets[0].name, dgm))
        super(MagmaBaseTest, self).tearDown()

    def run_compaction(self, compaction_iterations=5):
        for _ in range(compaction_iterations):
            compaction_tasks = list()
            for bucket in self.bucket_util.buckets:
                compaction_tasks.append(
                    self.task.async_compact_bucket(self.cluster.master,
                                                   bucket))
            for task in compaction_tasks:
                self.task_manager.get_task_result(task)

    def validate_seq_itr(self):
        if self.dcp_services and self.num_collections == 1:
            index_build_q = "SELECT state FROM system:indexes WHERE name='{}';"
            start = time.time()
            result = False
            while start + 300 > time.time():
                result = self.query_client.query_tool(index_build_q.format(
                    self.initial_idx),
                                                      timeout=60)
                if result["results"][0]["state"] == "online":
                    result = True
                    break
                self.sleep(5)
            self.assertTrue(result, "initial_idx Index warmup failed")
            self.final_idx = "final_idx"
            self.final_idx_q = "CREATE INDEX %s on default:`%s`.`%s`.`%s`(body) with \
                {\"defer_build\": false};" % (
                self.final_idx, self.buckets[0].name, self.scope_name,
                self.collections[0])
            result = self.query_client.query_tool(self.final_idx_q,
                                                  timeout=3600)
            start = time.time()
            if result["status"] != "success":
                while start + 300 > time.time():
                    result = self.query_client.query_tool(index_build_q.format(
                        self.final_idx),
                                                          timeout=60)
                    if result["results"][0]["state"] == "online":
                        result = True
                        break
                    self.sleep(5)
                self.assertTrue(result, "final_idx Index warmup failed")
            else:
                self.assertTrue(result["status"] == "success",
                                "Index query failed!")
            self.sleep(5)
            self.initial_count_q = "Select count(*) as items "\
                "from default:`{}`.`{}`.`{}` where meta().id like '%%';".format(
                    self.buckets[0].name, self.scope_name, self.collections[0])
            self.final_count_q = "Select count(*) as items "\
                "from default:`{}`.`{}`.`{}` where body like '%%';".format(
                    self.buckets[0].name, self.scope_name, self.collections[0])
            self.log.info(self.initial_count_q)
            self.log.info(self.final_count_q)
            initial_count, final_count = 0, 0
            kv_items = self.bucket_util.get_bucket_current_item_count(
                self.cluster, self.buckets[0])
            start = time.time()
            while start + 300 > time.time():
                kv_items = self.bucket_util.get_bucket_current_item_count(
                    self.cluster, self.buckets[0])
                self.log.info("Items in KV: %s" % kv_items)
                initial_count = self.query_client.query_tool(
                    self.initial_count_q)["results"][0]["items"]

                self.log.info("## Initial Index item count in %s:%s:%s == %s" %
                              (self.buckets[0].name, self.scope_name,
                               self.collections[0], initial_count))

                final_count = self.query_client.query_tool(
                    self.final_count_q)["results"][0]["items"]
                self.log.info("## Final Index item count in %s:%s:%s == %s" %
                              (self.buckets[0].name, self.scope_name,
                               self.collections[0], final_count))

                if initial_count != kv_items or final_count != kv_items:
                    self.sleep(5)
                    continue
                break
            self.assertTrue(
                initial_count == kv_items,
                "Indexer failed. KV:{}, Initial:{}".format(
                    kv_items, initial_count))
            self.assertTrue(
                final_count == kv_items,
                "Indexer failed. KV:{}, Final:{}".format(
                    kv_items, final_count))

    def genrate_docs_basic(self, start, end, target_vbucket=None, mutate=0):
        return doc_generator(self.key,
                             start,
                             end,
                             doc_size=self.doc_size,
                             doc_type=self.doc_type,
                             target_vbucket=target_vbucket,
                             vbuckets=self.cluster_util.vbuckets,
                             key_size=self.key_size,
                             randomize_doc_size=self.randomize_doc_size,
                             randomize_value=self.randomize_value,
                             mix_key_size=self.mix_key_size,
                             mutate=mutate,
                             deep_copy=self.deep_copy)

    def generate_docs(self,
                      doc_ops=None,
                      target_vbucket=None,
                      create_end=None,
                      create_start=None,
                      create_mutate=0,
                      update_end=None,
                      update_start=None,
                      update_mutate=0,
                      read_end=None,
                      read_start=None,
                      read_mutate=0,
                      delete_end=None,
                      delete_start=None,
                      expiry_end=None,
                      expiry_start=None,
                      expiry_mutate=0):

        doc_ops = doc_ops or self.doc_ops

        if "update" in doc_ops:
            if update_start is not None:
                self.update_start = update_start
            if update_end is not None:
                self.update_end = update_end

            if self.update_start is None:
                self.update_start = self.start
            if self.update_end is None:
                self.update_end = self.end * self.update_perc / 100

            self.mutate += 1
            self.gen_update = self.genrate_docs_basic(
                self.update_start,
                self.update_end,
                target_vbucket=target_vbucket,
                mutate=self.mutate)
        if "delete" in doc_ops:
            if delete_start is not None:
                self.delete_start = delete_start
            if delete_end is not None:
                self.delete_end = delete_end

            if self.delete_start is None:
                self.delete_start = self.start
            if self.delete_end is None:
                self.delete_end = self.end * self.delete_perc / 100

            self.gen_delete = self.genrate_docs_basic(
                self.delete_start,
                self.delete_end,
                target_vbucket=target_vbucket,
                mutate=read_mutate)
        if "create" in doc_ops:
            if create_start is not None:
                self.create_start = create_start
            if self.create_start is None:
                self.create_start = self.end
            self.start = self.create_start

            if create_end is not None:
                self.create_end = create_end
            if self.create_end is None:
                self.create_end = self.start + self.num_items * self.create_perc / 100
            self.end = self.create_end

            self.gen_create = self.genrate_docs_basic(
                self.create_start,
                self.create_end,
                target_vbucket=target_vbucket,
                mutate=create_mutate)
        if "read" in doc_ops:
            if read_start is not None:
                self.read_start = read_start
            if read_end is not None:
                self.read_end = read_end

            if self.read_start is None:
                self.read_start = self.create_start
            if self.read_end is None:
                self.read_end = self.create_end

            self.gen_read = self.genrate_docs_basic(
                self.read_start,
                self.read_end,
                target_vbucket=target_vbucket,
                mutate=read_mutate)
        if "expiry" in doc_ops:
            if expiry_start is not None:
                self.expiry_start = expiry_start
            elif self.expiry_start is None:
                self.expiry_start = self.start + (self.num_items *
                                                  self.delete_perc) / 100

            if expiry_end is not None:
                self.expiry_end = expiry_end
            elif self.expiry_end is None:
                self.expiry_end = self.start+self.num_items *\
                                  (self.delete_perc + self.expiry_perc)/100

            self.gen_expiry = self.genrate_docs_basic(
                self.expiry_start,
                self.expiry_end,
                target_vbucket=target_vbucket,
                mutate=expiry_mutate)

    def _load_all_buckets(self,
                          cluster,
                          kv_gen,
                          op_type,
                          exp,
                          flag=0,
                          only_store_hash=True,
                          batch_size=1000,
                          pause_secs=1,
                          timeout_secs=30,
                          compression=True,
                          dgm_batch=5000,
                          skip_read_on_error=False,
                          suppress_error_table=False,
                          track_failures=True):

        retry_exceptions = self.retry_exceptions
        tasks_info = self.bucket_util.sync_load_all_buckets(
            cluster,
            kv_gen,
            op_type,
            exp,
            flag,
            persist_to=self.persist_to,
            replicate_to=self.replicate_to,
            durability=self.durability_level,
            timeout_secs=timeout_secs,
            only_store_hash=only_store_hash,
            batch_size=batch_size,
            pause_secs=pause_secs,
            sdk_compression=compression,
            process_concurrency=self.process_concurrency,
            retry_exceptions=retry_exceptions,
            active_resident_threshold=self.active_resident_threshold,
            skip_read_on_error=skip_read_on_error,
            suppress_error_table=suppress_error_table,
            dgm_batch=dgm_batch,
            monitor_stats=self.monitor_stats,
            track_failures=track_failures)
        if self.active_resident_threshold < 100:
            for task, _ in tasks_info.items():
                self.num_items = task.doc_index
        self.assertTrue(self.bucket_util.doc_ops_tasks_status(tasks_info),
                        "Doc_ops failed in MagmaBase._load_all_buckets")
        return tasks_info

    def loadgen_docs(self,
                     retry_exceptions=[],
                     ignore_exceptions=[],
                     skip_read_on_error=False,
                     suppress_error_table=False,
                     scope=CbServer.default_scope,
                     collection=CbServer.default_collection,
                     _sync=True,
                     track_failures=True,
                     doc_ops=None):
        doc_ops = doc_ops or self.doc_ops

        tasks_info = dict()
        read_tasks_info = dict()
        read_task = False

        if self.check_temporary_failure_exception:
            retry_exceptions.append(SDKException.TemporaryFailureException)

        if "update" in doc_ops and self.gen_update is not None:
            tem_tasks_info = self.bucket_util._async_load_all_buckets(
                self.cluster,
                self.gen_update,
                "update",
                0,
                batch_size=self.batch_size,
                process_concurrency=self.process_concurrency,
                persist_to=self.persist_to,
                replicate_to=self.replicate_to,
                durability=self.durability_level,
                pause_secs=5,
                timeout_secs=self.sdk_timeout,
                retries=self.sdk_retries,
                retry_exceptions=retry_exceptions,
                ignore_exceptions=ignore_exceptions,
                skip_read_on_error=skip_read_on_error,
                suppress_error_table=suppress_error_table,
                scope=scope,
                collection=collection,
                monitor_stats=self.monitor_stats,
                track_failures=track_failures)
            tasks_info.update(tem_tasks_info.items())
        if "create" in doc_ops and self.gen_create is not None:
            tem_tasks_info = self.bucket_util._async_load_all_buckets(
                self.cluster,
                self.gen_create,
                "create",
                0,
                batch_size=self.batch_size,
                process_concurrency=self.process_concurrency,
                persist_to=self.persist_to,
                replicate_to=self.replicate_to,
                durability=self.durability_level,
                pause_secs=5,
                timeout_secs=self.sdk_timeout,
                retries=self.sdk_retries,
                retry_exceptions=retry_exceptions,
                ignore_exceptions=ignore_exceptions,
                skip_read_on_error=skip_read_on_error,
                suppress_error_table=suppress_error_table,
                scope=scope,
                collection=collection,
                monitor_stats=self.monitor_stats,
                track_failures=track_failures)
            tasks_info.update(tem_tasks_info.items())
            self.num_items += (self.gen_create.end - self.gen_create.start)
        if "expiry" in doc_ops and self.gen_expiry is not None and self.maxttl:
            tem_tasks_info = self.bucket_util._async_load_all_buckets(
                self.cluster,
                self.gen_expiry,
                "update",
                self.maxttl,
                self.random_exp,
                batch_size=self.batch_size,
                process_concurrency=self.process_concurrency,
                persist_to=self.persist_to,
                replicate_to=self.replicate_to,
                durability=self.durability_level,
                pause_secs=5,
                timeout_secs=self.sdk_timeout,
                retries=self.sdk_retries,
                retry_exceptions=retry_exceptions,
                ignore_exceptions=ignore_exceptions,
                skip_read_on_error=skip_read_on_error,
                suppress_error_table=suppress_error_table,
                scope=scope,
                collection=collection,
                monitor_stats=self.monitor_stats,
                track_failures=track_failures)
            tasks_info.update(tem_tasks_info.items())
            self.num_items -= (self.gen_expiry.end - self.gen_expiry.start)
        if "read" in doc_ops and self.gen_read is not None:
            read_tasks_info = self.bucket_util._async_validate_docs(
                self.cluster,
                self.gen_read,
                "read",
                0,
                batch_size=self.batch_size,
                process_concurrency=self.process_concurrency,
                pause_secs=5,
                timeout_secs=self.sdk_timeout,
                retry_exceptions=retry_exceptions,
                ignore_exceptions=ignore_exceptions,
                scope=scope,
                collection=collection)
            read_task = True
        if "delete" in doc_ops and self.gen_delete is not None:
            tem_tasks_info = self.bucket_util._async_load_all_buckets(
                self.cluster,
                self.gen_delete,
                "delete",
                0,
                batch_size=self.batch_size,
                process_concurrency=self.process_concurrency,
                persist_to=self.persist_to,
                replicate_to=self.replicate_to,
                durability=self.durability_level,
                pause_secs=5,
                timeout_secs=self.sdk_timeout,
                retries=self.sdk_retries,
                retry_exceptions=retry_exceptions,
                ignore_exceptions=ignore_exceptions,
                skip_read_on_error=skip_read_on_error,
                suppress_error_table=suppress_error_table,
                scope=scope,
                collection=collection,
                monitor_stats=self.monitor_stats,
                track_failures=track_failures)
            tasks_info.update(tem_tasks_info.items())
            self.num_items -= (self.gen_delete.end - self.gen_delete.start)

        if _sync:
            for task in tasks_info:
                self.task_manager.get_task_result(task)

            self.bucket_util.verify_doc_op_task_exceptions(
                tasks_info, self.cluster)
            self.bucket_util.log_doc_ops_task_failures(tasks_info)

        if read_task:
            # TODO: Need to converge read_tasks_info into tasks_info before
            #       itself to avoid confusions during _sync=False case
            tasks_info.update(read_tasks_info.items())
            if _sync:
                for task in read_tasks_info:
                    self.task_manager.get_task_result(task)

        return tasks_info

    def get_bucket_dgm(self, bucket):
        self.rest_client = BucketHelper(self.cluster.master)
        count = 0
        dgm = 100
        while count < 5:
            try:
                dgm = self.rest_client.fetch_bucket_stats(
                    bucket.name
                )["op"]["samples"]["vb_active_resident_items_ratio"][-1]
                self.log.info("Active Resident Threshold of {0} is {1}".format(
                    bucket.name, dgm))
                return dgm
            except Exception as e:
                self.sleep(5, e)
            count += 1
        return dgm

    def get_magma_stats(self, bucket, servers=None, field_to_grep=None):
        magma_stats_for_all_servers = dict()
        servers = servers or self.cluster.nodes_in_cluster
        if type(servers) is not list:
            servers = [servers]
        for server in servers:
            result = dict()
            shell = RemoteMachineShellConnection(server)
            cbstat_obj = Cbstats(shell)
            result = cbstat_obj.magma_stats(bucket.name,
                                            field_to_grep=field_to_grep)
            shell.disconnect()
            magma_stats_for_all_servers[server.ip] = result
        return magma_stats_for_all_servers

    def get_disk_usage(self, bucket, servers=None):
        disk_usage = []
        if servers is None:
            servers = self.cluster.nodes_in_cluster
        if type(servers) is not list:
            servers = [servers]
        kvstore = 0
        wal = 0
        keyTree = 0
        seqTree = 0
        for server in servers:
            shell = RemoteMachineShellConnection(server)
            kvstore += int(
                shell.execute_command("du -cm %s | tail -1 | awk '{print $1}'\
            " % os.path.join(
                    RestConnection(server).get_data_path(), bucket.name,
                    "magma.*/kv*"))[0][0].split('\n')[0])
            wal += int(
                shell.execute_command("du -cm %s | tail -1 | awk '{print $1}'\
            " % os.path.join(
                    RestConnection(server).get_data_path(), bucket.name,
                    "magma.*/wal"))[0][0].split('\n')[0])
            keyTree += int(
                shell.execute_command("du -cm %s | tail -1 | awk '{print $1}'\
            " % os.path.join(
                    RestConnection(server).get_data_path(), bucket.name,
                    "magma.*/kv*/rev*/key*"))[0][0].split('\n')[0])
            seqTree += int(
                shell.execute_command("du -cm %s | tail -1 | awk '{print $1}'\
            " % os.path.join(
                    RestConnection(server).get_data_path(), bucket.name,
                    "magma.*/kv*/rev*/seq*"))[0][0].split('\n')[0])
            shell.disconnect()
        self.log.info("Disk usage stats for bucekt {} is below".format(
            bucket.name))
        self.log.info("Total Disk usage for kvstore is {}MB".format(kvstore))
        self.get_bucket_dgm(bucket)
        self.log.debug("Total Disk usage for wal is {}MB".format(wal))
        self.log.debug("Total Disk usage for keyTree is {}MB".format(keyTree))
        self.log.debug("Total Disk usage for seqTree is {}MB".format(seqTree))
        disk_usage.extend([kvstore, wal, keyTree, seqTree])
        return disk_usage

    def change_swap_space(self, servers=None, disable=True):
        servers = servers or self.cluster.nodes_in_cluster
        if type(servers) is not list:
            servers = [servers]
        for server in servers:
            shell = RemoteMachineShellConnection(server)
            if disable:
                _ = shell.execute_command("swapoff -a")
                self.sleep(5)
                output = shell.execute_command(
                    "free | tail -1 | awk '{print $2}'")[0][0].split('\n')[0]
                self.assertEqual(
                    int(output),
                    0,
                    msg=
                    "Failed to disable swap space on server {} having value {} \
                     ".format(server, output))
            else:
                _ = shell.execute_command("swapon -a")
                self.sleep(5)
                output = shell.execute_command(
                    "free | tail -1 | awk '{print $2}'")[0][0].split('\n')[0]
                self.assertNotEqual(
                    int(output),
                    0,
                    msg=
                    "Failed to enable swap space on server {} having value {} \
                    ".format(server, output))
        return

    def check_fragmentation_using_magma_stats(self, bucket, servers=None):
        result = dict()
        time_end = time.time() + 60 * 5
        if servers is None:
            servers = self.cluster.nodes_in_cluster
        if type(servers) is not list:
            servers = [servers]
        while time.time() < time_end:
            stats = list()
            for server in servers:
                fragmentation_values = list()
                shell = RemoteMachineShellConnection(server)
                output = shell.execute_command(
                    "lscpu | grep 'CPU(s)' | head -1 | awk '{print $2}'"
                )[0][0].split('\n')[0]
                self.log.debug("machine: {} - core(s): {}\
                ".format(server.ip, output))
                for i in range(min(int(output), 64)):
                    grep_field = "rw_{}:magma".format(i)
                    _res = self.get_magma_stats(bucket, [server],
                                                field_to_grep=grep_field)
                    fragmentation_values.append(
                        float(_res[server.ip][grep_field]["Fragmentation"]))
                    stats.append(_res)
                result.update({server.ip: fragmentation_values})
            res = list()
            for value in result.values():
                res.append(max(value))
            if max(res) < float(self.fragmentation) / 100:
                self.log.info("magma stats fragmentation result {} \
                ".format(result))
                return True
        self.log.info("magma stats fragmentation result {} \
        ".format(result))
        self.log.info(stats)
        return False

    def check_fragmentation_using_bucket_stats(self, bucket, servers=None):
        # Disabling the check for time being
        #return True
        result = dict()
        if servers is None:
            servers = self.cluster.nodes_in_cluster
        if type(servers) is not list:
            servers = [servers]
        time_end = time.time() + 60 * 5
        while time.time() < time_end:
            for server in servers:
                frag_val = self.bucket_util.get_fragmentation_kv(
                    bucket, server)
                self.log.debug("Current Fragmentation for node {} is {} \
                ".format(server.ip, frag_val))
                result.update({server.ip: frag_val})
            if max(result.values()) < self.fragmentation:
                self.log.info(
                    "KV stats fragmentation values {}".format(result))
                return True
        self.log.info("KV stats fragmentation values {}".format(result))
        return False

    def get_fragmentation_upsert_docs_list(self):
        """
         This function gives the list of "number of docs" need
         to be updated to touch the given fragmentation value
        """
        update_doc_count = int(
            math.ceil(
                float(self.fragmentation * self.num_items) /
                (100 - self.fragmentation)))

        upsert_doc_list = list()
        while update_doc_count > self.num_items:
            upsert_doc_list.append(self.num_items)
            update_doc_count -= self.num_items
        if update_doc_count > 0:
            upsert_doc_list.append(update_doc_count)
        self.log.info("Upsert list {}".format(upsert_doc_list))
        return upsert_doc_list

    def validate_data(self, op_type, kv_gen, _sync=True):
        self.log.info("Validating Docs")
        validate_tasks_info = dict()
        for collection in self.collections:
            temp_tasks_info = self.bucket_util._async_validate_docs(
                self.cluster,
                kv_gen,
                op_type,
                0,
                batch_size=self.batch_size,
                process_concurrency=self.process_concurrency,
                pause_secs=5,
                timeout_secs=self.sdk_timeout,
                scope=self.scope_name,
                collection=collection,
                retry_exceptions=self.retry_exceptions,
                ignore_exceptions=self.ignore_exceptions)
            validate_tasks_info.update(temp_tasks_info.items())
        if _sync:
            for task in validate_tasks_info:
                self.task_manager.get_task_result(task)
        else:
            return validate_tasks_info

    def sigkill_memcached(self, nodes=None, graceful=False):
        nodes = nodes or self.cluster.nodes_in_cluster
        for node in nodes:
            shell = RemoteMachineShellConnection(node)
            if graceful:
                shell.restart_couchbase()
            else:
                shell.kill_memcached()
            shell.disconnect()
        self.assertTrue(
            self.bucket_util._wait_warmup_completed(
                [self.cluster_util.cluster.master],
                self.bucket_util.buckets[0],
                wait_time=self.wait_timeout * 20))

    def crash(self,
              nodes=None,
              kill_itr=1,
              graceful=False,
              wait=True,
              force_collect=False):
        self.stop_crash = False
        count = kill_itr
        loop_itr = 0

        nodes = nodes or self.cluster.nodes_in_cluster

        connections = dict()
        for node in nodes:
            shell = RemoteMachineShellConnection(node)
            connections.update({node: shell})

        while not self.stop_crash:
            loop_itr += 1
            sleep = random.randint(30, 60)
            self.sleep(
                sleep,
                "Iteration:{} waiting for {} sec to kill memcached on all nodes"
                .format(loop_itr, sleep))

            for node, shell in connections.items():
                if "kv" in node.services:
                    if graceful:
                        shell.restart_couchbase()
                    else:
                        while count > 0:
                            shell.kill_memcached()
                            self.sleep(
                                3,
                                "Sleep before killing memcached on same node again."
                            )
                            count -= 1
                        count = kill_itr

            result = self.check_coredump_exist(self.cluster.nodes_in_cluster,
                                               force_collect=force_collect)
            if result:
                self.stop_crash = True
                self.task.jython_task_manager.abort_all_tasks()
                self.assertFalse(
                    result, "CRASH | CRITICAL | WARN messages "
                    "found in cb_logs")

            if wait:
                for node in nodes:
                    if "kv" in node.services:
                        result = self.bucket_util._wait_warmup_completed(
                            [node],
                            self.bucket_util.buckets[0],
                            wait_time=self.wait_timeout * 5)
                        if not result:
                            self.stop_crash = True
                            self.task.jython_task_manager.abort_all_tasks()
                            self.assertFalse(result)

        for _, shell in connections.items():
            shell.disconnect()

    def get_state_files(self, bucket, server=None):

        if server is None:
            server = self.cluster_util.cluster.master

        shell = RemoteMachineShellConnection(server)

        magma_path = os.path.join(
            RestConnection(server).get_data_path(), bucket.name, "magma.0")
        kv_path = shell.execute_command("ls %s | grep kv | head -1" %
                                        magma_path)[0][0].split('\n')[0]
        path = os.path.join(magma_path, kv_path, "rev*/seqIndex")
        self.log.debug("SeqIndex path = {}".format(path))

        output = shell.execute_command("ls %s | grep state" % path)[0]
        self.log.debug("State files = {}".format(output))
        shell.disconnect()

        return output

    def get_tombstone_count_key(self, servers=[]):
        result = 0
        for server in servers:
            data_path = RestConnection(server).get_data_path()
            bucket = self.bucket_util.buckets[0]
            magma_path = os.path.join(data_path, bucket.name, "magma.{}")

            shell = RemoteMachineShellConnection(server)
            shards = shell.execute_command(
                "lscpu | grep 'CPU(s)' | head -1 | awk '{print $2}'"
            )[0][0].split('\n')[0]
            self.log.debug("machine: {} - core(s): {}".format(
                server.ip, shards))
            for shard in range(min(int(shards), 64)):
                magma = magma_path.format(shard)
                kvstores, _ = shell.execute_command(
                    "ls {} | grep kvstore".format(magma))
                cmd = '/opt/couchbase/bin/magma_dump {}'.format(magma)
                for kvstore in kvstores:
                    dump = cmd
                    kvstore_num = kvstore.split("-")[1].strip()
                    dump += ' --kvstore {} --tree key --treedata | grep Key |grep \'"deleted":true\' | wc -l'.format(
                        kvstore_num)
                    result += int(shell.execute_command(dump)[0][0].strip())
        return result

    def get_tombstone_count_seq(self, server=None, shard=0, kvstore=0):
        cmd = '/opt/couchbase/bin/magma_dump /data/kv/default/magma.{}/ \
        --kvstore {} --tree key --treedata | grep Seq| wc -l'.format(
            shard, kvstore)
        shell = RemoteMachineShellConnection(server)
        result = shell.execute_command(cmd)[0]
        return result

    def get_level_data_range(self,
                             server=None,
                             tree="key",
                             shard=0,
                             kvstore=0):
        cmd = '/opt/couchbase/bin/magma_dump /data/kv/default/magma.{}/ \
        --kvstore {} --tree {}'.format(shard, kvstore, tree)
        shell = RemoteMachineShellConnection(server)
        result = shell.execute_command(cmd)[0]
        return result

    def set_metadata_purge_interval(self, value, buckets=[], node=None):
        self.log.info(
            "Changing the bucket properties by changing {0} to {1}".format(
                "purge_interval", value))
        if not buckets:
            buckets = self.buckets
        if node is None:
            node = self.cluster.master
        rest = RestConnection(node)

        shell = RemoteMachineShellConnection(node)
        shell.enable_diag_eval_on_non_local_hosts()
        shell.disconnect()

        for bucket in buckets:
            cmd = '{ok, BC} = ns_bucket:get_bucket(' \
                  '"%s"), BC2 = lists:keyreplace(purge_interval, ' \
                  '1, BC, {purge_interval, %f})' \
                  ', ns_bucket:set_bucket_config("%s", BC2).' \
                  % (bucket.name, value, bucket.name)
            rest.diag_eval(cmd)

        # Restart Memcached in all cluster nodes to reflect the settings
        for server in self.cluster_util.get_kv_nodes(master=node):
            shell = RemoteMachineShellConnection(server)
            shell.restart_couchbase()
            shell.disconnect()

        # Check bucket-warm_up after Couchbase restart
        retry_count = 10
        buckets_warmed_up = self.bucket_util.is_warmup_complete(
            buckets, retry_count)
        if not buckets_warmed_up:
            self.log.critical("Few bucket(s) not warmed up "
                              "within expected time")
Esempio n. 6
0
class volume(BaseTestCase):
    # will add the __init__ functions after the test has been stabilised
    def setUp(self):
        self.input = TestInputSingleton.input
        self.input.test_params.update({"default_bucket":False})
        BaseTestCase.setUp(self)
        self.rest = RestConnection(self.servers[0])
        self.op_type = self.input.param("op_type", "create")
        self.tasks = []         # To have all tasks running in parallel.
        self._iter_count = 0    # To keep a check of how many items are deleted
        self.available_servers = list()
        self.available_servers = self.cluster.servers[self.nodes_init:]
        self.num_buckets = self.input.param("num_buckets", 1)
        self.mutate = 0
        self.doc_ops = self.input.param("doc_ops", None)
        if self.doc_ops:
            self.doc_ops = self.doc_ops.split(';')
        self.iterations = self.input.param("iterations", 2)
        self.vbucket_check = self.input.param("vbucket_check", True)
        self.new_num_writer_threads = self.input.param(
            "new_num_writer_threads", 6)
        self.new_num_reader_threads = self.input.param(
            "new_num_reader_threads", 8)

    def create_required_buckets(self):
        self.log.info("Get the available memory quota")
        self.info = self.rest.get_nodes_self()
        threshold_memory = 100
        # threshold_memory_vagrant = 100
        total_memory_in_mb = self.info.mcdMemoryReserved
        total_available_memory_in_mb = total_memory_in_mb
        active_service = self.info.services

        # If the mentioned service is already present,
        # we remove that much memory from available memory quota
        if "index" in active_service:
            total_available_memory_in_mb -= self.info.indexMemoryQuota
        if "fts" in active_service:
            total_available_memory_in_mb -= self.info.ftsMemoryQuota
        if "cbas" in active_service:
            total_available_memory_in_mb -= self.info.cbasMemoryQuota
        if "eventing" in active_service:
            total_available_memory_in_mb -= self.info.eventingMemoryQuota

        available_memory = total_available_memory_in_mb - threshold_memory
        # available_memory =  total_available_memory_in_mb - threshold_memory_vagrant
        self.rest.set_service_memoryQuota(service='memoryQuota',
                                          memoryQuota=available_memory)

        # Creating buckets for data loading purpose
        self.log.info("Create CB buckets")
        duration = self.input.param("bucket_expiry", 0)
        eviction_policy = self.input.param("eviction_policy", Bucket.EvictionPolicy.VALUE_ONLY)
        self.bucket_type = self.input.param("bucket_type", Bucket.Type.MEMBASE) # Bucket.bucket_type.EPHEMERAL
        compression_mode = self.input.param("compression_mode", Bucket.CompressionMode.PASSIVE)  # Bucket.bucket_compression_mode.ACTIVE
        ramQuota = self.input.param("ramQuota", available_memory)
        bucket_names = self.input.param("bucket_names", "GleamBookUsers")
        if bucket_names:
            bucket_names = bucket_names.split(';')
        if self.bucket_type:
            self.bucket_type = self.bucket_type.split(';')
        if compression_mode:
            compression_mode = compression_mode.split(';')
        if eviction_policy:
            eviction_policy = eviction_policy.split(';')
        if self.num_buckets == 1:
            bucket = Bucket({"name": "GleamBookUsers", "ramQuotaMB": ramQuota, "maxTTL": duration, "replicaNumber":self.num_replicas,
                            "evictionPolicy": eviction_policy[0], "bucketType":self.bucket_type[0], "compressionMode":compression_mode[0]})
            self.bucket_util.create_bucket(bucket)
        elif 1 < self.num_buckets == len(bucket_names):
            for i in range(self.num_buckets):
                bucket = Bucket({"name": bucket_names[i], "ramQuotaMB": ramQuota/self.num_buckets, "maxTTL": duration, "replicaNumber":self.num_replicas,
                             "evictionPolicy": eviction_policy[i], "bucketType":self.bucket_type[i], "compressionMode":compression_mode[i]})
                self.bucket_util.create_bucket(bucket)
        else:
            self.fail("Number of bucket/Names not sufficient")

        # rebalance the new buckets across all nodes.
        self.log.info("Rebalance Starts")
        self.nodes = self.rest.node_statuses()
        self.rest.rebalance(otpNodes=[node.id for node in self.nodes],
                            ejectedNodes=[])
        self.rest.monitorRebalance()
        return bucket

    def set_num_writer_and_reader_threads(self, num_writer_threads="default", num_reader_threads="default"):
        for node in self.cluster_util.get_kv_nodes():
            bucket_helper = BucketHelper(node)
            bucket_helper.update_memcached_settings(num_writer_threads=num_writer_threads,
                                                    num_reader_threads=num_reader_threads)

    def volume_doc_generator_users(self, key, start, end):
        template = '{{ "id":"{0}", "alias":"{1}", "name":"{2}", "user_since":"{3}", "employment":{4} }}'
        return GleamBookUsersDocumentGenerator(key, template,
                                               start=start, end=end)

    def volume_doc_generator_messages(self, key, start, end):
        template = '{{ "message_id": "{0}", "author_id": "{1}", "send_time": "{2}" }}'
        return GleamBookMessagesDocumentGenerator(key, template,
                                                  start=start, end=end)

    def initial_data_load(self, initial_load):
        if self.atomicity:
            task = self.task.async_load_gen_docs_atomicity(self.cluster, self.bucket_util.buckets,
                                                            initial_load, "create" , exp=0,
                                                            batch_size=10,
                                                            process_concurrency=self.process_concurrency,
                                                            replicate_to=self.replicate_to,
                                                            persist_to=self.persist_to, timeout_secs=self.sdk_timeout,
                                                            retries=self.sdk_retries,update_count=self.mutate, transaction_timeout=self.transaction_timeout,
                                                            commit=self.transaction_commit,durability=self.durability_level,sync=self.sync)
            self.task.jython_task_manager.get_task_result(task)
        else:
            tasks_info = self.bucket_util._async_load_all_buckets(self.cluster, initial_load,
                                                            "create", exp=0,
                                                            persist_to = self.persist_to,
                                                            replicate_to=self.replicate_to,
                                                            batch_size= 10,
                                                            pause_secs = 5,
                                                            timeout_secs=30,
                                                            durability=self.durability_level,
                                                            process_concurrency = self.process_concurrency,
                                                            retries=self.sdk_retries)

            for task, task_info in tasks_info.items():
                self.task_manager.get_task_result(task)
        self.sleep(10)

    # Loading documents in 2 buckets in parallel through transactions
    def doc_load_using_txns(self):
        if "update" in self.doc_ops and self.gen_update_users is not None:
            self.tasks.append(self.doc_loader_txn("update", self.gen_update_users))
        if "create" in self.doc_ops and self.gen_create_users is not None:
            self.tasks.append(self.doc_loader_txn("create", self.gen_create_users))
        if "delete" in self.doc_ops and self.gen_delete_users is not  None:
            self.tasks.append(self.doc_loader_txn("delete", self.gen_delete_users))
        self.sleep(20)
        for task in self.tasks:
            self.task.jython_task_manager.get_task_result(task)

    def doc_loader_txn(self, op_type, kv_gen):
        if op_type == "update":
            print("Value of Mutated is", self.mutate)
            self.sleep(5)
        process_concurrency = self.process_concurrency
        # if op_type == "update":
        #     if "create" not in self.doc_ops:
        #         self.create_perc = 0
        #     if "delete" not in self.doc_ops:
        #         self.delete_perc = 0
        #     process_concurrency = (self.update_perc*process_concurrency)/(self.create_perc + self.delete_perc + self.update_perc)
        # if op_type == "create":
        #     if "update" not in self.doc_ops:
        #         self.update_perc = 0
        #     if "delete" not in self.doc_ops:
        #         self.delete_perc = 0
        #     process_concurrency = (self.create_perc*process_concurrency)/(self.create_perc + self.delete_perc + self.update_perc)
        # if op_type == "delete":
        #     if "create" not in self.doc_ops:
        #         self.create_perc = 0
        #     if "update" not in self.doc_ops:
        #         self.update_perc = 0
        #     process_concurrency = (self.delete_perc*process_concurrency)/(self.create_perc + self.delete_perc + self.update_perc)
        task = self.task.async_load_gen_docs_atomicity(self.cluster, self.bucket_util.buckets,
                                                       kv_gen, op_type, exp=0,
                                                       batch_size=10,
                                                       process_concurrency=process_concurrency,
                                                       replicate_to=self.replicate_to,
                                                       persist_to=self.persist_to, timeout_secs=self.sdk_timeout,
                                                       retries=self.sdk_retries, update_count=self.mutate,
                                                       transaction_timeout=self.transaction_timeout,
                                                       commit=self.transaction_commit, durability=self.durability_level,
                                                       sync=self.sync, defer=self.defer)
        return task

    # Loading documents through normal doc loader
    def normal_doc_loader(self):
        tasks_info = dict()
        if "update" in self.doc_ops and self.gen_update_users is not None:
            task_info = self.doc_loader("update", self.gen_update_users)
            tasks_info.update(task_info.items())
        if "create" in self.doc_ops and self.gen_create_users is not None:
            task_info = self.doc_loader("create", self.gen_create_users)
            tasks_info.update(task_info.items())
        if "delete" in self.doc_ops and self.gen_delete_users is not None:
            task_info = self.doc_loader("delete", self.gen_delete_users)
            tasks_info.update(task_info.items())
        return tasks_info

    def doc_loader(self, op_type, kv_gen):
        process_concurrency = self.process_concurrency
        if op_type == "update":
            if "create" not in self.doc_ops:
                self.create_perc = 0
            if "delete" not in self.doc_ops:
                self.delete_perc = 0
            process_concurrency = (self.update_perc*process_concurrency)/(self.create_perc + self.delete_perc + self.update_perc)
        if op_type == "create":
            if "update" not in self.doc_ops:
                self.update_perc = 0
            if "delete" not in self.doc_ops:
                self.delete_perc = 0
            process_concurrency = (self.create_perc*process_concurrency)/(self.create_perc + self.delete_perc + self.update_perc)
        if op_type == "delete":
            if "create" not in self.doc_ops:
                self.create_perc = 0
            if "update" not in self.doc_ops:
                self.update_perc = 0
            process_concurrency = (self.delete_perc*process_concurrency)/(self.create_perc + self.delete_perc + self.update_perc)
        retry_exceptions = [
            SDKException.AmbiguousTimeoutException,
            SDKException.RequestCanceledException,
            SDKException.DurabilityAmbiguousException,
            SDKException.DurabilityImpossibleException,
        ]
        tasks_info = self.bucket_util._async_load_all_buckets(self.cluster, kv_gen,
                                                              op_type, 0, batch_size=20,
                                                              persist_to=self.persist_to, replicate_to=self.replicate_to,
                                                              durability=self.durability_level, pause_secs=5,
                                                              timeout_secs=30, process_concurrency=process_concurrency,
                                                              retries=self.sdk_retries,
                                                              retry_exceptions=retry_exceptions)
        return tasks_info

    # Stopping and restarting the memcached process
    def stop_process(self):
        target_node = self.servers[2]
        remote = RemoteMachineShellConnection(target_node)
        error_sim = CouchbaseError(self.log, remote)
        error_to_simulate = "stop_memcached"
        # Induce the error condition
        error_sim.create(error_to_simulate)
        self.sleep(20, "Wait before reverting the error condition")
        # Revert the simulated error condition and close the ssh session
        error_sim.revert(error_to_simulate)
        remote.disconnect()

    def rebalance(self, nodes_in=0, nodes_out=0):
        servs_in = random.sample(self.available_servers, nodes_in)

        self.nodes_cluster = self.cluster.nodes_in_cluster[:]
        self.nodes_cluster.remove(self.cluster.master)
        servs_out = random.sample(self.nodes_cluster, nodes_out)

        if nodes_in == nodes_out:
            self.vbucket_check = False

        rebalance_task = self.task.async_rebalance(
            self.cluster.servers[:self.nodes_init], servs_in, servs_out, check_vbucket_shuffling=self.vbucket_check)

        self.available_servers = [servs for servs in self.available_servers if servs not in servs_in]
        self.available_servers += servs_out

        self.cluster.nodes_in_cluster.extend(servs_in)
        self.cluster.nodes_in_cluster = list(set(self.cluster.nodes_in_cluster) - set(servs_out))
        return rebalance_task

    def rebalance_validation(self, tasks_info, rebalance_task):
        if not rebalance_task.result:
            for task, _ in tasks_info.items():
                self.task.jython_task_manager.get_task_result(task)
            self.fail("Rebalance Failed")

    def data_validation(self, tasks_info):
        if not self.atomicity:
            for task in tasks_info:
                self.task_manager.get_task_result(task)
            self.bucket_util.verify_doc_op_task_exceptions(tasks_info,
                                                           self.cluster)
            self.bucket_util.log_doc_ops_task_failures(tasks_info)

            self.sleep(10)

            for task, task_info in tasks_info.items():
                self.assertFalse(
                    task_info["ops_failed"],
                    "Doc ops failed for task: {}".format(task.thread_name))

        self.log.info("Validating Active/Replica Docs")
        if self.atomicity:
            self.check_replica = False
        else:
            self.check_replica = True

        for bucket in self.bucket_util.buckets:
            tasks = list()
            if self.gen_update_users is not None:
                tasks.append(self.task.async_validate_docs(
                    self.cluster, bucket, self.gen_update_users, "update", 0,
                    batch_size=10, check_replica=self.check_replica))
            if self.gen_create_users is not None:
                tasks.append(self.task.async_validate_docs(
                    self.cluster, bucket, self.gen_create_users, "create", 0,
                    batch_size=10, check_replica=self.check_replica))
            if self.gen_delete_users is not  None:
                tasks.append(self.task.async_validate_docs(
                    self.cluster, bucket, self.gen_delete_users, "delete", 0,
                    batch_size=10, check_replica=self.check_replica))
            for task in tasks:
                self.task.jython_task_manager.get_task_result(task)
            self.sleep(20)

        if not self.atomicity:
            self.bucket_util._wait_for_stats_all_buckets()
            self.bucket_util.verify_stats_all_buckets(self.end - self.initial_load_count*self.delete_perc/100*self._iter_count)

    def data_load(self):
        tasks_info = dict()
        if self.atomicity:
            self.doc_load_using_txns()
            self.sleep(10)
        else:
            tasks_info = self.normal_doc_loader()
            self.sleep(10)
        return tasks_info

    def generate_docs(self):
        self.create_perc = self.input.param("create_perc",100)
        self.update_perc = self.input.param("update_perc", 10)
        self.delete_perc = self.input.param("delete_perc", 10)

        self.gen_delete_users = None
        self.gen_create_users = None
        self.gen_update_users = None

        if "update" in self.doc_ops:
            self.mutate += 1
            self.gen_update_users = doc_generator("Users", 0, self.initial_load_count*self.update_perc/100,
                                                doc_size = self.doc_size, mutate = self.mutate)
        if "delete" in self.doc_ops:
            self.gen_delete_users = doc_generator("Users", self.start,
                                              self.start + (self.initial_load_count*self.delete_perc)/100, doc_size = self.doc_size)
            self._iter_count += 1

        if "create" in self.doc_ops:
            self.start = self.end
            self.end += self.initial_load_count*self.create_perc/100
            self.gen_create_users = doc_generator("Users", self.start, self.end, doc_size = self.doc_size)

    def data_validation_mode(self, tasks_info):
        # if not self.atomicity:
        self.data_validation(tasks_info)
        '''
        else:
            for task in self.tasks:
                self.task.jython_task_manager.get_task_result(task)
            self.sleep(10)
        '''
    def get_bucket_dgm(self, bucket):
        self.rest_client = BucketHelper(self.cluster.master)
        dgm = self.rest_client.fetch_bucket_stats(
            bucket.name)["op"]["samples"]["vb_active_resident_items_ratio"][-1]
        self.log.info("Active Resident Threshold of {0} is {1}".format(bucket.name, dgm))

    def print_crud_stats(self):
        self.table = TableView(self.log.info)
        self.table.set_headers(["Initial Items", "Current Items", "Items Updated", "Items Created", "Items Deleted"])
        if self._iter_count != 0:
            self.table.add_row([str(self.start - self.initial_load_count*self.delete_perc/100*(self._iter_count-1)),
                                str(self.end- self.initial_load_count*self.delete_perc/100*self._iter_count),
                                str(self.update_perc - self.update_perc) + "---" +
                                str(self.initial_load_count*self.update_perc/100),
                                str(self.start) + "---" + str(self.end),
                                str(self.start - self.initial_load_count*self.create_perc/100) + "---" +
                                str(self.start + (self.initial_load_count*self.delete_perc/100) - self.initial_load_count*self.create_perc/100)])
        self.table.display("Docs statistics")

    def test_volume_taf(self):
        ########################################################################################################################
        self.log.info("Step1: Create a n node cluster")
        nodes_init = self.cluster.servers[1:self.nodes_init] if self.nodes_init != 1 else []
        self.task.rebalance([self.cluster.master], nodes_init, [])
        self.cluster.nodes_in_cluster.extend([self.cluster.master] + nodes_init)
        self.query_node = self.cluster.master
        ########################################################################################################################
        self.log.info("Step 2 & 3: Create required buckets.")
        bucket = self.create_required_buckets()
        self.loop = 0
        #######################################################################################################################
        while self.loop<self.iterations:
            self.log.info("Step 4: Pre-Requisites for Loading of docs")
            self.start = 0
            self.bucket_util.add_rbac_user()
            self.end = self.initial_load_count = self.input.param("initial_load", 1000)
            initial_load = doc_generator("Users", self.start, self.start + self.initial_load_count, doc_size=self.doc_size)
            self.initial_data_load(initial_load)
            self.tasks = []
            self.bucket_util.print_bucket_stats()
            self.get_bucket_dgm(bucket)
            ########################################################################################################################
            self.log.info("Step 5: Rebalance in with Loading of docs")
            self.generate_docs()
            self.gen_delete_users=None
            self._iter_count = 0
            if not self.atomicity:
                self.set_num_writer_and_reader_threads(num_writer_threads="disk_io_optimized",
                                                       num_reader_threads="disk_io_optimized")
            rebalance_task = self.rebalance(nodes_in = 1, nodes_out = 0)
            tasks_info = self.data_load()
            if not self.atomicity:
                self.set_num_writer_and_reader_threads(num_writer_threads=self.new_num_writer_threads,
                                                       num_reader_threads=self.new_num_reader_threads)
            # self.sleep(600, "Wait for Rebalance to start")
            self.task.jython_task_manager.get_task_result(rebalance_task)
            reached = RestHelper(self.rest).rebalance_reached(wait_step=120)
            self.assertTrue(reached, "rebalance failed, stuck or did not complete")
            self.data_validation_mode(tasks_info)
            self.tasks = []
            self.bucket_util.print_bucket_stats()
            self.print_crud_stats()
            self.get_bucket_dgm(bucket)
            #########################################################################################################################
            self.log.info("Step 6: Rebalance Out with Loading of docs")
            self.generate_docs()
            if not self.atomicity:
                self.set_num_writer_and_reader_threads(num_writer_threads="disk_io_optimized",
                                                       num_reader_threads="disk_io_optimized")
            rebalance_task = self.rebalance(nodes_in = 0, nodes_out = 1)
            tasks_info = self.data_load()
            if not self.atomicity:
                self.set_num_writer_and_reader_threads(num_writer_threads=self.new_num_writer_threads,
                                                       num_reader_threads=self.new_num_reader_threads)
            # self.sleep(600, "Wait for Rebalance to start")
            self.task.jython_task_manager.get_task_result(rebalance_task)
            reached = RestHelper(self.rest).rebalance_reached(wait_step=120)
            self.assertTrue(reached, "rebalance failed, stuck or did not complete")
            self.data_validation_mode(tasks_info)
            self.tasks = []
            self.bucket_util.print_bucket_stats()
            self.print_crud_stats()
            self.get_bucket_dgm(bucket)
            #######################################################################################################################
            self.log.info("Step 7: Rebalance In_Out with Loading of docs")
            self.generate_docs()
            if not self.atomicity:
                self.set_num_writer_and_reader_threads(num_writer_threads="disk_io_optimized",
                                                       num_reader_threads="disk_io_optimized")
            rebalance_task = self.rebalance(nodes_in = 2, nodes_out = 1)
            tasks_info = self.data_load()
            if not self.atomicity:
                self.set_num_writer_and_reader_threads(num_writer_threads=self.new_num_writer_threads,
                                                       num_reader_threads=self.new_num_reader_threads)
            # self.sleep(600, "Wait for Rebalance to start")
            self.task.jython_task_manager.get_task_result(rebalance_task)
            reached = RestHelper(self.rest).rebalance_reached(wait_step=120)
            self.assertTrue(reached, "rebalance failed, stuck or did not complete")
            self.data_validation_mode(tasks_info)
            self.tasks = []
            self.bucket_util.print_bucket_stats()
            self.print_crud_stats()
            self.get_bucket_dgm(bucket)
            ########################################################################################################################
            self.log.info("Step 8: Swap with Loading of docs")
            self.generate_docs()
            if not self.atomicity:
                self.set_num_writer_and_reader_threads(num_writer_threads="disk_io_optimized",
                                                       num_reader_threads="disk_io_optimized")
            rebalance_task = self.rebalance(nodes_in=1, nodes_out=1)
            tasks_info = self.data_load()
            if not self.atomicity:
                self.set_num_writer_and_reader_threads(num_writer_threads=self.new_num_writer_threads,
                                                       num_reader_threads=self.new_num_reader_threads)
            # self.sleep(600, "Wait for Rebalance to start")
            self.task.jython_task_manager.get_task_result(rebalance_task)
            reached = RestHelper(self.rest).rebalance_reached(wait_step=120)
            self.assertTrue(reached, "rebalance failed, stuck or did not complete")
            self.data_validation_mode(tasks_info)
            self.tasks = []
            self.bucket_util.print_bucket_stats()
            self.print_crud_stats()
            self.get_bucket_dgm(bucket)
            ########################################################################################################################
            self.log.info("Step 9: Updating the bucket replica to 2")
            bucket_helper = BucketHelper(self.cluster.master)
            for i in range(len(self.bucket_util.buckets)):
                bucket_helper.change_bucket_props(
                    self.bucket_util.buckets[i], replicaNumber=2)
            self.generate_docs()
            if not self.atomicity:
                self.set_num_writer_and_reader_threads(num_writer_threads="disk_io_optimized",
                                                       num_reader_threads="disk_io_optimized")
            rebalance_task = self.rebalance(nodes_in =1, nodes_out= 0)
            tasks_info = self.data_load()
            if not self.atomicity:
                self.set_num_writer_and_reader_threads(num_writer_threads=self.new_num_writer_threads,
                                                       num_reader_threads=self.new_num_reader_threads)
            # self.sleep(600, "Wait for Rebalance to start")
            self.task.jython_task_manager.get_task_result(rebalance_task)
            reached = RestHelper(self.rest).rebalance_reached(wait_step=120)
            self.assertTrue(reached, "rebalance failed, stuck or did not complete")
            self.data_validation_mode(tasks_info)
            self.tasks = []
            self.bucket_util.print_bucket_stats()
            self.print_crud_stats()
            self.get_bucket_dgm(bucket)
            ########################################################################################################################
            if "ephemeral" in self.bucket_type:
                self.log.info("No Memcached kill for epehemral bucket")
            else:
                self.log.info("Step 10: Stopping and restarting memcached process")
                self.generate_docs()
                if not self.atomicity:
                    self.set_num_writer_and_reader_threads(num_writer_threads=self.new_num_writer_threads,
                                                           num_reader_threads=self.new_num_reader_threads)
                rebalance_task = self.task.async_rebalance(self.cluster.servers, [], [])
                tasks_info = self.data_load()
                if not self.atomicity:
                    self.set_num_writer_and_reader_threads(num_writer_threads="disk_io_optimized",
                                                           num_reader_threads="disk_io_optimized")
                # self.sleep(600, "Wait for Rebalance to start")
                self.task.jython_task_manager.get_task_result(rebalance_task)
                reached = RestHelper(self.rest).rebalance_reached(wait_step=120)
                self.assertTrue(reached, "rebalance failed, stuck or did not complete")
                self.stop_process()
                self.data_validation_mode(tasks_info)
                self.tasks = []
                self.bucket_util.print_bucket_stats()
                self.print_crud_stats()
                self.get_bucket_dgm(bucket)
            ########################################################################################################################
            self.log.info("Step 11: Failover a node and RebalanceOut that node with loading in parallel")
            self.std_vbucket_dist = self.input.param("std_vbucket_dist", None)
            std = self.std_vbucket_dist or 1.0

            prev_failover_stats = self.bucket_util.get_failovers_logs(self.cluster.nodes_in_cluster, self.bucket_util.buckets)
            prev_vbucket_stats = self.bucket_util.get_vbucket_seqnos(self.cluster.nodes_in_cluster, self.bucket_util.buckets)
            self.sleep(10)

            disk_replica_dataset, disk_active_dataset = self.bucket_util.get_and_compare_active_replica_data_set_all(
                self.cluster.nodes_in_cluster, self.bucket_util.buckets, path=None)

            self.rest = RestConnection(self.cluster.master)
            self.nodes = self.cluster_util.get_nodes(self.cluster.master)
            self.chosen = self.cluster_util.pick_nodes(self.cluster.master, howmany=1)

            # Mark Node for failover
            self.generate_docs()
            tasks_info = self.data_load()
            self.success_failed_over = self.rest.fail_over(self.chosen[0].id, graceful=False)

            self.sleep(300)
            self.nodes = self.rest.node_statuses()
            if not self.atomicity:
                self.set_num_writer_and_reader_threads(num_writer_threads=self.new_num_writer_threads,
                                                       num_reader_threads=self.new_num_reader_threads)
            self.rest.rebalance(otpNodes=[node.id for node in self.nodes], ejectedNodes=[self.chosen[0].id])
            # self.sleep(600)
            self.assertTrue(self.rest.monitorRebalance(stop_if_loop=True), msg="Rebalance failed")

            servs_out = [node for node in self.cluster.servers if node.ip == self.chosen[0].ip]
            self.cluster.nodes_in_cluster = list(set(self.cluster.nodes_in_cluster) - set(servs_out))
            self.available_servers += servs_out
            self.sleep(10)

            self.data_validation_mode(tasks_info)

            self.bucket_util.compare_failovers_logs(prev_failover_stats, self.cluster.nodes_in_cluster, self.bucket_util.buckets)
            self.sleep(10)

            self.bucket_util.data_analysis_active_replica_all(
                disk_active_dataset, disk_replica_dataset,
                self.cluster.servers[:self.nodes_in + self.nodes_init],
                self.bucket_util.buckets, path=None)
            nodes = self.cluster_util.get_nodes_in_cluster(self.cluster.master)
            self.bucket_util.vb_distribution_analysis(
                servers=nodes, buckets=self.bucket_util.buckets,
                num_replicas=2,
                std=std, total_vbuckets=self.cluster_util.vbuckets)
            self.sleep(10)
            self.tasks = []
            rebalance_task = self.rebalance(nodes_in=1, nodes_out=0)
            # self.sleep(600)
            self.task.jython_task_manager.get_task_result(rebalance_task)
            reached = RestHelper(self.rest).rebalance_reached(wait_step=120)
            self.assertTrue(reached, "rebalance failed, stuck or did not complete")
            self.bucket_util.print_bucket_stats()
            self.print_crud_stats()
            self.get_bucket_dgm(bucket)
            ########################################################################################################################
            self.log.info("Step 12: Failover a node and FullRecovery that node")

            self.std_vbucket_dist = self.input.param("std_vbucket_dist", None)
            std = self.std_vbucket_dist or 1.0

            prev_failover_stats = self.bucket_util.get_failovers_logs(self.cluster.nodes_in_cluster, self.bucket_util.buckets)
            prev_vbucket_stats = self.bucket_util.get_vbucket_seqnos(self.cluster.nodes_in_cluster, self.bucket_util.buckets)
            self.sleep(10)

            disk_replica_dataset, disk_active_dataset = self.bucket_util.get_and_compare_active_replica_data_set_all(
                self.cluster.nodes_in_cluster, self.bucket_util.buckets, path=None)

            self.rest = RestConnection(self.cluster.master)
            self.nodes = self.cluster_util.get_nodes(self.cluster.master)
            self.chosen = self.cluster_util.pick_nodes(self.cluster.master, howmany=1)

            self.generate_docs()
            tasks_info = self.data_load()
            # Mark Node for failover
            self.success_failed_over = self.rest.fail_over(self.chosen[0].id, graceful=False)

            self.sleep(300)

            # Mark Node for full recovery
            if self.success_failed_over:
                self.rest.set_recovery_type(otpNode=self.chosen[0].id, recoveryType="full")

            if not self.atomicity:
                self.set_num_writer_and_reader_threads(num_writer_threads=self.new_num_writer_threads,
                                                       num_reader_threads=self.new_num_reader_threads)

            rebalance_task = self.task.async_rebalance(
                self.cluster.servers[:self.nodes_init], [], [])
            if not self.atomicity:
                self.set_num_writer_and_reader_threads(num_writer_threads="disk_io_optimized",
                                                       num_reader_threads="disk_io_optimized")
            # self.sleep(600)
            self.task.jython_task_manager.get_task_result(rebalance_task)
            reached = RestHelper(self.rest).rebalance_reached(wait_step=120)
            self.assertTrue(reached, "rebalance failed, stuck or did not complete")
            self.sleep(10)

            self.data_validation_mode(tasks_info)

            self.bucket_util.compare_failovers_logs(prev_failover_stats, self.cluster.nodes_in_cluster, self.bucket_util.buckets)
            self.sleep(10)

            self.bucket_util.data_analysis_active_replica_all(
                disk_active_dataset, disk_replica_dataset,
                self.cluster.servers[:self.nodes_in + self.nodes_init],
                self.bucket_util.buckets, path=None)
            nodes = self.cluster_util.get_nodes_in_cluster(self.cluster.master)
            self.bucket_util.vb_distribution_analysis(
                servers=nodes, buckets=self.bucket_util.buckets,
                num_replicas=2,
                std=std, total_vbuckets=self.cluster_util.vbuckets)
            self.sleep(10)
            self.tasks = []
            self.bucket_util.print_bucket_stats()
            self.print_crud_stats()
            self.get_bucket_dgm(bucket)
            ########################################################################################################################
            self.log.info("Step 13: Failover a node and DeltaRecovery that node with loading in parallel")

            self.std_vbucket_dist = self.input.param("std_vbucket_dist", None)
            std = self.std_vbucket_dist or 1.0

            prev_failover_stats = self.bucket_util.get_failovers_logs(self.cluster.nodes_in_cluster, self.bucket_util.buckets)
            prev_vbucket_stats = self.bucket_util.get_vbucket_seqnos(self.cluster.nodes_in_cluster, self.bucket_util.buckets)
            self.sleep(10)

            disk_replica_dataset, disk_active_dataset = self.bucket_util.get_and_compare_active_replica_data_set_all(
                self.cluster.nodes_in_cluster, self.bucket_util.buckets, path=None)

            self.rest = RestConnection(self.cluster.master)
            self.nodes = self.cluster_util.get_nodes(self.cluster.master)
            self.chosen = self.cluster_util.pick_nodes(self.cluster.master, howmany=1)

            self.generate_docs()
            tasks_info = self.data_load()
            # Mark Node for failover
            self.success_failed_over = self.rest.fail_over(self.chosen[0].id, graceful=False)

            self.sleep(300)
            if self.success_failed_over:
                self.rest.set_recovery_type(otpNode=self.chosen[0].id, recoveryType="delta")
            if not self.atomicity:
                self.set_num_writer_and_reader_threads(num_writer_threads=self.new_num_writer_threads,
                                                       num_reader_threads=self.new_num_reader_threads)

            rebalance_task = self.task.async_rebalance(
                self.cluster.servers[:self.nodes_init], [], [])
            if not self.atomicity:
                self.set_num_writer_and_reader_threads(num_writer_threads="disk_io_optimized",
                                                       num_reader_threads="disk_io_optimized")
            # self.sleep(600)
            self.task.jython_task_manager.get_task_result(rebalance_task)
            reached = RestHelper(self.rest).rebalance_reached(wait_step=120)
            self.assertTrue(reached, "rebalance failed, stuck or did not complete")
            self.sleep(10)

            self.data_validation_mode(tasks_info)

            self.bucket_util.compare_failovers_logs(prev_failover_stats, self.cluster.nodes_in_cluster, self.bucket_util.buckets)
            self.sleep(10)

            self.bucket_util.data_analysis_active_replica_all(
                disk_active_dataset, disk_replica_dataset,
                self.cluster.servers[:self.nodes_in + self.nodes_init],
                self.bucket_util.buckets, path=None)
            nodes = self.cluster_util.get_nodes_in_cluster(self.cluster.master)
            self.bucket_util.vb_distribution_analysis(
                servers=nodes, buckets=self.bucket_util.buckets,
                num_replicas=2,
                std=std, total_vbuckets=self.cluster_util.vbuckets)
            self.bucket_util.print_bucket_stats()
            self.print_crud_stats()
            self.get_bucket_dgm(bucket)
        ########################################################################################################################
            self.log.info("Step 14: Updating the bucket replica to 1")
            bucket_helper = BucketHelper(self.cluster.master)
            for i in range(len(self.bucket_util.buckets)):
                bucket_helper.change_bucket_props(
                    self.bucket_util.buckets[i], replicaNumber=1)
            self.generate_docs()
            if not self.atomicity:
                self.set_num_writer_and_reader_threads(num_writer_threads=self.new_num_writer_threads,
                                                       num_reader_threads=self.new_num_reader_threads)
            rebalance_task = self.task.async_rebalance(self.cluster.servers, [], [])
            tasks_info = self.data_load()
            if not self.atomicity:
                self.set_num_writer_and_reader_threads(num_writer_threads="disk_io_optimized",
                                                       num_reader_threads="disk_io_optimized")
            # self.sleep(600, "Wait for Rebalance to start")
            self.task.jython_task_manager.get_task_result(rebalance_task)
            reached = RestHelper(self.rest).rebalance_reached(wait_step=120)
            self.assertTrue(reached, "rebalance failed, stuck or did not complete")
            self.data_validation_mode(tasks_info)
            self.tasks = []
            self.bucket_util.print_bucket_stats()
            self.print_crud_stats()
            self.get_bucket_dgm(bucket)
        ########################################################################################################################
            self.log.info("Step 15: Flush the bucket and start the entire process again")
            self.loop += 1
            if self.loop < self.iterations:
                # Flush the bucket
                self.bucket_util.flush_all_buckets(self.cluster.master)
                self.sleep(10)
                if len(self.cluster.nodes_in_cluster) > self.nodes_init:
                    self.nodes_cluster = self.cluster.nodes_in_cluster[:]
                    self.nodes_cluster.remove(self.cluster.master)
                    servs_out = random.sample(self.nodes_cluster, int(len(self.cluster.nodes_in_cluster) - self.nodes_init))
                    rebalance_task = self.task.async_rebalance(
                        self.cluster.servers[:self.nodes_init], [], servs_out)
                    # self.sleep(600)
                    self.task.jython_task_manager.get_task_result(rebalance_task)
                    self.available_servers += servs_out
                    self.cluster.nodes_in_cluster = list(set(self.cluster.nodes_in_cluster) - set(servs_out))
                    reached = RestHelper(self.rest).rebalance_reached(wait_step=120)
                    self.assertTrue(reached, "rebalance failed, stuck or did not complete")
                    self.get_bucket_dgm(bucket)
                self._iter_count = 0
            else:
                self.log.info("Volume Test Run Complete")
                self.get_bucket_dgm(bucket)
Esempio n. 7
0
class OPD:
    def __init__(self):
        pass

    def threads_calculation(self):
        self.process_concurrency = self.input.param("pc",
                                                    self.process_concurrency)
        self.doc_loading_tm = TaskManager(self.process_concurrency)

    def get_memory_footprint(self):
        out = subprocess.Popen(
            ['ps', 'v', '-p', str(os.getpid())],
            stdout=subprocess.PIPE).communicate()[0].split(b'\n')
        vsz_index = out[0].split().index(b'RSS')
        mem = float(out[1].split()[vsz_index]) / 1024
        self.PrintStep("RAM FootPrint: %s" % str(mem))
        return mem

    def create_required_buckets(self, cluster):
        if self.cluster.cloud_cluster:
            return
        self.log.info("Get the available memory quota")
        rest = RestConnection(cluster.master)
        self.info = rest.get_nodes_self()

        # threshold_memory_vagrant = 100
        kv_memory = self.info.memoryQuota - 100

        # Creating buckets for data loading purpose
        self.log.info("Create CB buckets")
        self.bucket_expiry = self.input.param("bucket_expiry", 0)
        ramQuota = self.input.param("ramQuota", kv_memory)
        buckets = ["GleamBookUsers"] * self.num_buckets
        bucket_type = self.bucket_type.split(';') * self.num_buckets
        compression_mode = self.compression_mode.split(';') * self.num_buckets
        self.bucket_eviction_policy = self.bucket_eviction_policy
        for i in range(self.num_buckets):
            bucket = Bucket({
                Bucket.name: buckets[i] + str(i),
                Bucket.ramQuotaMB: ramQuota / self.num_buckets,
                Bucket.maxTTL: self.bucket_expiry,
                Bucket.replicaNumber: self.num_replicas,
                Bucket.storageBackend: self.bucket_storage,
                Bucket.evictionPolicy: self.bucket_eviction_policy,
                Bucket.bucketType: bucket_type[i],
                Bucket.flushEnabled: Bucket.FlushBucket.ENABLED,
                Bucket.compressionMode: compression_mode[i],
                Bucket.fragmentationPercentage: self.fragmentation
            })
            self.bucket_util.create_bucket(cluster, bucket)

        # rebalance the new buckets across all nodes.
        self.log.info("Rebalance Starts")
        self.nodes = rest.node_statuses()
        rest.rebalance(otpNodes=[node.id for node in self.nodes],
                       ejectedNodes=[])
        rest.monitorRebalance()

    def create_required_collections(self, cluster, num_scopes,
                                    num_collections):
        self.scope_name = self.input.param("scope_name", "_default")
        if self.scope_name != "_default":
            self.bucket_util.create_scope(cluster, self.bucket,
                                          {"name": self.scope_name})
        if num_scopes > 1:
            self.scope_prefix = self.input.param("scope_prefix", "VolumeScope")
            for bucket in cluster.buckets:
                for i in range(num_scopes):
                    scope_name = self.scope_prefix + str(i)
                    self.log.info("Creating scope: %s" % (scope_name))
                    self.bucket_util.create_scope(cluster.master, bucket,
                                                  {"name": scope_name})
                    self.sleep(0.5)
            self.num_scopes += 1
        for bucket in cluster.buckets:
            for scope in bucket.scopes.keys():
                if num_collections > 0:
                    self.collection_prefix = self.input.param(
                        "collection_prefix", "VolumeCollection")

                    for i in range(num_collections):
                        collection_name = self.collection_prefix + str(i)
                        self.bucket_util.create_collection(
                            cluster.master, bucket, scope,
                            {"name": collection_name})
                        self.sleep(0.5)

        self.collections = cluster.buckets[0].scopes[
            self.scope_name].collections.keys()
        self.log.debug("Collections list == {}".format(self.collections))

    def stop_purger(self, tombstone_purge_age=60):
        """
        1. Disable ts purger
        2. Create fts indexes (to create metakv, ns_config entries)
        3. Delete fts indexes
        4. Grep ns_config for '_deleted' to get total deleted keys count
        5. enable ts purger and age = 1 mins
        6. Sleep for 2 minutes
        7. Grep for debug.log and check for latest tombstones purged count
        8. Validate step4 count matches step 7 count for all nodes
        """
        self.rest.update_tombstone_purge_age_for_removal(tombstone_purge_age)
        self.rest.disable_tombstone_purger()

    def get_bucket_dgm(self, bucket):
        self.rest_client = BucketHelper(self.cluster.master)
        dgm = self.rest_client.fetch_bucket_stats(
            bucket.name)["op"]["samples"]["vb_active_resident_items_ratio"][-1]
        self.log.info("Active Resident Threshold of {0} is {1}".format(
            bucket.name, dgm))
        return dgm

    def _induce_error(self, error_condition, nodes=[]):
        nodes = nodes or [self.cluster.master]
        for node in nodes:
            if error_condition == "stop_server":
                self.cluster_util.stop_server(node)
            elif error_condition == "enable_firewall":
                self.cluster_util.start_firewall_on_node(node)
            elif error_condition == "kill_memcached":
                shell = RemoteMachineShellConnection(node)
                shell.kill_memcached()
                shell.disconnect()
            elif error_condition == "reboot_server":
                shell = RemoteMachineShellConnection(node)
                shell.reboot_node()
            elif error_condition == "kill_erlang":
                shell = RemoteMachineShellConnection(node)
                shell.kill_erlang()
                shell.disconnect()
            else:
                self.fail("Invalid error induce option")

    def _recover_from_error(self, error_condition):
        for node in self.cluster.nodes_in_cluster:
            if error_condition == "stop_server" or error_condition == "kill_erlang":
                self.cluster_util.start_server(node)
            elif error_condition == "enable_firewall":
                self.cluster_util.stop_firewall_on_node(node)

        for node in self.cluster.kv_nodes + [self.cluster.master]:
            self.check_warmup_complete(node)
            result = self.cluster_util.wait_for_ns_servers_or_assert(
                [node], wait_time=1200)
            self.assertTrue(result, "Server warmup failed")

    def rebalance(self,
                  nodes_in=0,
                  nodes_out=0,
                  services=[],
                  retry_get_process_num=3000):
        self.servs_in = list()
        self.nodes_cluster = self.cluster.nodes_in_cluster[:]
        self.nodes_cluster.remove(self.cluster.master)
        self.servs_out = list()
        services = services or ["kv"]
        print "KV nodes in cluster: %s" % [
            server.ip for server in self.cluster.kv_nodes
        ]
        print "CBAS nodes in cluster: %s" % [
            server.ip for server in self.cluster.cbas_nodes
        ]
        print "INDEX nodes in cluster: %s" % [
            server.ip for server in self.cluster.index_nodes
        ]
        print "FTS nodes in cluster: %s" % [
            server.ip for server in self.cluster.fts_nodes
        ]
        print "QUERY nodes in cluster: %s" % [
            server.ip for server in self.cluster.query_nodes
        ]
        print "EVENTING nodes in cluster: %s" % [
            server.ip for server in self.cluster.eventing_nodes
        ]
        print "AVAILABLE nodes for cluster: %s" % [
            server.ip for server in self.available_servers
        ]
        if nodes_out:
            if "cbas" in services:
                servers = random.sample(self.cluster.cbas_nodes, nodes_out)
                self.servs_out.extend(servers)
                for server in servers:
                    self.cluster.cbas_nodes.remove(server)
            if "index" in services:
                servers = random.sample(self.cluster.index_nodes, nodes_out)
                self.servs_out.extend(servers)
                for server in servers:
                    self.cluster.index_nodes.remove(server)
            if "fts" in services:
                servers = random.sample(self.cluster.fts_nodes, nodes_out)
                self.servs_out.extend(servers)
                for server in servers:
                    self.cluster.fts_nodes.remove(server)
            if "query" in services:
                servers = random.sample(self.cluster.query_nodes, nodes_out)
                self.servs_out.extend(servers)
                for server in servers:
                    self.cluster.query_nodes.remove(server)
            if "eventing" in services:
                servers = random.sample(self.cluster.eventing_nodes, nodes_out)
                self.servs_out.extend(servers)
                for server in servers:
                    self.cluster.eventing_nodes.remove(server)
            if "kv" in services:
                nodes = [
                    node for node in self.cluster.kv_nodes
                    if node.ip != self.cluster.master.ip
                ]
                servers = random.sample(nodes, nodes_out)
                self.servs_out.extend(servers)
                for server in servers:
                    self.cluster.kv_nodes.remove(server)

        if nodes_in:
            if "cbas" in services:
                servers = random.sample(self.available_servers, nodes_in)
                self.servs_in.extend(servers)
                self.cluster.cbas_nodes.extend(servers)
                self.available_servers = [
                    servs for servs in self.available_servers
                    if servs not in servers
                ]
            if "index" in services:
                servers = random.sample(self.available_servers, nodes_in)
                self.servs_in.extend(servers)
                self.cluster.index_nodes.extend(servers)
                self.available_servers = [
                    servs for servs in self.available_servers
                    if servs not in servers
                ]
            if "fts" in services:
                servers = random.sample(self.available_servers, nodes_in)
                self.servs_in.extend(servers)
                self.cluster.fts_nodes.extend(servers)
                self.available_servers = [
                    servs for servs in self.available_servers
                    if servs not in servers
                ]
            if "query" in services:
                servers = random.sample(self.available_servers, nodes_in)
                self.servs_in.extend(servers)
                self.cluster.query_nodes.extend(servers)
                self.available_servers = [
                    servs for servs in self.available_servers
                    if servs not in servers
                ]
            if "eventing" in services:
                servers = random.sample(self.available_servers, nodes_in)
                self.servs_in.extend(servers)
                self.cluster.eventing_nodes.extend(servers)
                self.available_servers = [
                    servs for servs in self.available_servers
                    if servs not in servers
                ]
            if "kv" in services:
                servers = random.sample(self.available_servers, nodes_in)
                self.servs_in.extend(servers)
                self.cluster.kv_nodes.extend(servers)
                self.available_servers = [
                    servs for servs in self.available_servers
                    if servs not in servers
                ]

        print "Servers coming in : %s with services: %s" % (
            [server.ip for server in self.servs_in], services)
        print "Servers going out : %s" % (
            [server.ip for server in self.servs_out])
        self.available_servers.extend(self.servs_out)
        print "NEW AVAILABLE nodes for cluster: %s" % (
            [server.ip for server in self.available_servers])
        if nodes_in == nodes_out:
            self.vbucket_check = False

        rebalance_task = self.task.async_rebalance(
            self.cluster,
            self.servs_in,
            self.servs_out,
            services=services,
            check_vbucket_shuffling=self.vbucket_check,
            retry_get_process_num=retry_get_process_num)

        return rebalance_task

    def generate_docs(self,
                      doc_ops=None,
                      create_end=None,
                      create_start=None,
                      update_end=None,
                      update_start=None,
                      delete_end=None,
                      delete_start=None,
                      expire_end=None,
                      expire_start=None,
                      read_end=None,
                      read_start=None):
        self.get_memory_footprint()
        self.create_end = 0
        self.create_start = 0
        self.read_end = 0
        self.read_start = 0
        self.update_end = 0
        self.update_start = 0
        self.delete_end = 0
        self.delete_start = 0
        self.expire_end = 0
        self.expire_start = 0
        self.initial_items = self.final_items

        doc_ops = doc_ops or self.doc_ops
        self.mutations_to_validate = doc_ops

        if "read" in doc_ops:
            if read_start is not None:
                self.read_start = read_start
            else:
                self.read_start = 0
            if read_end is not None:
                self.read_end = read_end
            else:
                self.read_end = self.num_items * self.mutation_perc / 100

        if "update" in doc_ops:
            if update_start is not None:
                self.update_start = update_start
            else:
                self.update_start = 0
            if update_end is not None:
                self.update_end = update_end
            else:
                self.update_end = self.num_items * self.mutation_perc / 100
            self.mutate += 1

        if "delete" in doc_ops:
            if delete_start is not None:
                self.delete_start = delete_start
            else:
                self.delete_start = self.start
            if delete_end is not None:
                self.delete_end = delete_end
            else:
                self.delete_end = self.start + self.num_items * self.mutation_perc / 100
            self.final_items -= (self.delete_end - self.delete_start
                                 ) * self.num_collections * self.num_scopes

        if "expiry" in doc_ops:
            if self.maxttl == 0:
                self.maxttl = self.input.param("maxttl", 10)
            if expire_start is not None:
                self.expire_start = expire_start
            else:
                self.expire_start = self.delete_end
            if expire_end is not None:
                self.expire_end = expire_end
            else:
                self.expire_end = self.expire_start + self.num_items * self.mutation_perc / 100
            self.final_items -= (self.expire_end - self.expire_start
                                 ) * self.num_collections * self.num_scopes

        if "create" in doc_ops:
            if create_start is not None:
                self.create_start = create_start
            else:
                self.create_start = self.end
            self.start = self.create_start

            if create_end is not None:
                self.create_end = create_end
            else:
                self.create_end = self.end + (
                    self.expire_end - self.expire_start) + (self.delete_end -
                                                            self.delete_start)
            self.end = self.create_end

            self.final_items += (abs(self.create_end - self.create_start)
                                 ) * self.num_collections * self.num_scopes

        print "Read Start: %s" % self.read_start
        print "Read End: %s" % self.read_end
        print "Update Start: %s" % self.update_start
        print "Update End: %s" % self.update_end
        print "Expiry Start: %s" % self.expire_start
        print "Expiry End: %s" % self.expire_end
        print "Delete Start: %s" % self.delete_start
        print "Delete End: %s" % self.delete_end
        print "Create Start: %s" % self.create_start
        print "Create End: %s" % self.create_end
        print "Final Start: %s" % self.start
        print "Final End: %s" % self.end

    def _loader_dict(self, cmd={}):
        self.loader_map = dict()
        for bucket in self.cluster.buckets:
            for scope in bucket.scopes.keys():
                for collection in bucket.scopes[scope].collections.keys():
                    if collection == "_default" and scope == "_default":
                        continue
                    ws = WorkLoadSettings(
                        cmd.get("keyPrefix", self.key),
                        cmd.get("keySize", self.key_size),
                        cmd.get("docSize", self.doc_size),
                        cmd.get("cr", self.create_perc),
                        cmd.get("rd", self.read_perc),
                        cmd.get("up", self.update_perc),
                        cmd.get("dl", self.delete_perc),
                        cmd.get("ex", self.expiry_perc),
                        cmd.get("workers", self.process_concurrency),
                        cmd.get("ops", self.ops_rate),
                        cmd.get("loadType", None), cmd.get("keyType", None),
                        cmd.get("valueType", None), cmd.get("validate", False),
                        cmd.get("gtm", False), cmd.get("deleted", False),
                        cmd.get("mutated", 0))
                    hm = HashMap()
                    hm.putAll({
                        DRConstants.create_s: self.create_start,
                        DRConstants.create_e: self.create_end,
                        DRConstants.update_s: self.update_start,
                        DRConstants.update_e: self.update_end,
                        DRConstants.expiry_s: self.expire_start,
                        DRConstants.expiry_e: self.expire_end,
                        DRConstants.delete_s: self.delete_start,
                        DRConstants.delete_e: self.delete_end,
                        DRConstants.read_s: self.read_start,
                        DRConstants.read_e: self.read_end
                    })
                    dr = DocRange(hm)
                    ws.dr = dr
                    dg = DocumentGenerator(ws, self.key_type, self.val_type)
                    self.loader_map.update(
                        {bucket.name + scope + collection: dg})

    def wait_for_doc_load_completion(self, tasks, wait_for_stats=True):
        self.doc_loading_tm.getAllTaskResult()
        self.get_memory_footprint()
        for task in tasks:
            task.result = True
            unique_str = "{}:{}:{}:".format(task.sdk.bucket, task.sdk.scope,
                                            task.sdk.collection)
            for optype, failures in task.failedMutations.items():
                for failure in failures:
                    if failure is not None:
                        print("Test Retrying {}: {}{} -> {}".format(
                            optype, unique_str, failure.id(),
                            failure.err().getClass().getSimpleName()))
                        try:
                            if optype == "create":
                                task.docops.insert(failure.id(),
                                                   failure.document(),
                                                   task.sdk.connection,
                                                   task.setOptions)
                            if optype == "update":
                                task.docops.upsert(failure.id(),
                                                   failure.document(),
                                                   task.sdk.connection,
                                                   task.upsertOptions)
                            if optype == "delete":
                                task.docops.delete(failure.id(),
                                                   task.sdk.connection,
                                                   task.removeOptions)
                        except (ServerOutOfMemoryException,
                                TimeoutException) as e:
                            print("Retry {} failed for key: {} - {}".format(
                                optype, failure.id(), e))
                            task.result = False
                        except (DocumentNotFoundException,
                                DocumentExistsException) as e:
                            pass
            try:
                task.sdk.disconnectCluster()
            except Exception as e:
                print(e)
            self.assertTrue(task.result,
                            "Task Failed: {}".format(task.taskName))
        if wait_for_stats:
            try:
                self.bucket_util._wait_for_stats_all_buckets(
                    self.cluster, self.cluster.buckets, timeout=14400)
                if self.track_failures:
                    self.bucket_util.verify_stats_all_buckets(self.cluster,
                                                              self.final_items,
                                                              timeout=14400)
            except Exception as e:
                if not self.cluster.cloud_cluster:
                    self.get_gdb()
                raise e

    def get_gdb(self):
        for node in self.cluster.nodes_in_cluster:
            gdb_shell = RemoteMachineShellConnection(node)
            gdb_out = gdb_shell.execute_command(
                'gdb -p `(pidof memcached)` -ex "thread apply all bt" -ex detach -ex quit'
            )[0]
            print node.ip
            print gdb_out
            gdb_shell.disconnect()

    def data_validation(self):
        self.get_memory_footprint()
        doc_ops = self.mutations_to_validate
        pc = min(self.process_concurrency, 20)
        if self._data_validation:
            self.log.info("Validating Active/Replica Docs")
            cmd = dict()
            self.ops_rate = self.input.param("ops_rate", 2000)
            master = Server(self.cluster.master.ip, self.cluster.master.port,
                            self.cluster.master.rest_username,
                            self.cluster.master.rest_password,
                            str(self.cluster.master.memcached_port))
            self.loader_map = dict()
            for bucket in self.cluster.buckets:
                for scope in bucket.scopes.keys():
                    for collection in bucket.scopes[scope].collections.keys():
                        if collection == "_default" and scope == "_default":
                            continue
                        for op_type in doc_ops:
                            cmd.update({"deleted": False})
                            hm = HashMap()
                            if op_type == "create":
                                hm.putAll({
                                    DRConstants.read_s: self.create_start,
                                    DRConstants.read_e: self.create_end
                                })
                            elif op_type == "update":
                                hm.putAll({
                                    DRConstants.read_s: self.update_start,
                                    DRConstants.read_e: self.update_end
                                })
                            elif op_type == "delete":
                                hm.putAll({
                                    DRConstants.read_s: self.delete_start,
                                    DRConstants.read_e: self.delete_end
                                })
                                cmd.update({"deleted": True})
                            else:
                                continue
                            dr = DocRange(hm)
                            ws = WorkLoadSettings(
                                cmd.get("keyPrefix", self.key),
                                cmd.get("keySize", self.key_size),
                                cmd.get("docSize", self.doc_size),
                                cmd.get("cr", 0), cmd.get("rd", 100),
                                cmd.get("up", 0), cmd.get("dl", 0),
                                cmd.get("ex", 0), cmd.get("workers", pc),
                                cmd.get("ops", self.ops_rate),
                                cmd.get("loadType", None),
                                cmd.get("keyType", None),
                                cmd.get("valueType", None),
                                cmd.get("validate",
                                        True), cmd.get("gtm", False),
                                cmd.get("deleted", False),
                                cmd.get("mutated", 0))
                            ws.dr = dr
                            dg = DocumentGenerator(ws, self.key_type,
                                                   self.val_type)
                            self.loader_map.update({
                                bucket.name + scope + collection + op_type:
                                dg
                            })

            tasks = list()
            i = pc
            while i > 0:
                for bucket in self.cluster.buckets:
                    for scope in bucket.scopes.keys():
                        for collection in bucket.scopes[
                                scope].collections.keys():
                            if collection == "_default" and scope == "_default":
                                continue
                            for op_type in doc_ops:
                                if op_type not in [
                                        "create", "update", "delete"
                                ]:
                                    continue
                                client = NewSDKClient(master, bucket.name,
                                                      scope, collection)
                                client.initialiseSDK()
                                self.sleep(1)
                                taskName = "Validate_%s_%s_%s_%s_%s_%s" % (
                                    bucket.name, scope, collection, op_type,
                                    str(i), time.time())
                                task = WorkLoadGenerate(
                                    taskName,
                                    self.loader_map[bucket.name + scope +
                                                    collection + op_type],
                                    client, "NONE", self.maxttl,
                                    self.time_unit, self.track_failures, 0)
                                tasks.append(task)
                                self.doc_loading_tm.submit(task)
                                i -= 1
        self.doc_loading_tm.getAllTaskResult()
        for task in tasks:
            try:
                task.sdk.disconnectCluster()
            except Exception as e:
                print(e)
        for task in tasks:
            self.assertTrue(task.result,
                            "Validation Failed for: %s" % task.taskName)
        self.get_memory_footprint()

    def print_crud_stats(self):
        self.table = TableView(self.log.info)
        self.table.set_headers([
            "Initial Items", "Current Items", "Items Updated", "Items Created",
            "Items Deleted", "Items Expired"
        ])
        self.table.add_row([
            str(self.initial_items),
            str(self.final_items),
            str(abs(self.update_start)) + "-" + str(abs(self.update_end)),
            str(abs(self.create_start)) + "-" + str(abs(self.create_end)),
            str(abs(self.delete_start)) + "-" + str(abs(self.delete_end)),
            str(abs(self.expire_start)) + "-" + str(abs(self.expire_end))
        ])
        self.table.display("Docs statistics")

    def perform_load(self,
                     crash=False,
                     num_kills=1,
                     wait_for_load=True,
                     validate_data=True):
        self.get_memory_footprint()
        self._loader_dict()
        master = Server(self.cluster.master.ip, self.cluster.master.port,
                        self.cluster.master.rest_username,
                        self.cluster.master.rest_password,
                        str(self.cluster.master.memcached_port))
        tasks = list()
        i = self.process_concurrency
        while i > 0:
            for bucket in self.cluster.buckets:
                for scope in bucket.scopes.keys():
                    for collection in bucket.scopes[scope].collections.keys():
                        if collection == "_default" and scope == "_default":
                            continue
                        client = NewSDKClient(master, bucket.name, scope,
                                              collection)
                        client.initialiseSDK()
                        self.sleep(1)
                        self.get_memory_footprint()
                        taskName = "Loader_%s_%s_%s_%s_%s" % (
                            bucket.name, scope, collection, str(i),
                            time.time())
                        task = WorkLoadGenerate(
                            taskName,
                            self.loader_map[bucket.name + scope + collection],
                            client, self.durability_level, self.maxttl,
                            self.time_unit, self.track_failures, 0)
                        tasks.append(task)
                        self.doc_loading_tm.submit(task)
                        i -= 1

        if wait_for_load:
            self.wait_for_doc_load_completion(tasks)
            self.get_memory_footprint()
        else:
            return tasks

        if crash:
            self.kill_memcached(num_kills=num_kills)

        if validate_data:
            self.data_validation()

        self.print_stats()

        if self.cluster.cloud_cluster:
            return

        result = self.check_coredump_exist(self.cluster.nodes_in_cluster)
        if result:
            self.PrintStep("CRASH | CRITICAL | WARN messages found in cb_logs")
            if self.assert_crashes_on_load:
                self.task_manager.abort_all_tasks()
                self.doc_loading_tm.abortAllTasks()
                self.assertFalse(result)

    def get_magma_disk_usage(self, bucket=None):
        if bucket is None:
            bucket = self.bucket
        servers = self.cluster.nodes_in_cluster
        kvstore = 0
        wal = 0
        keyTree = 0
        seqTree = 0
        data_files = 0

        for server in servers:
            shell = RemoteMachineShellConnection(server)
            bucket_path = os.path.join(
                RestConnection(server).get_data_path(), bucket.name)
            kvstore += int(
                shell.execute_command("du -cm %s | tail -1 | awk '{print $1}'\
            " % os.path.join(bucket_path, "magma.*/kv*"))[0][0].split('\n')[0])
            wal += int(
                shell.execute_command("du -cm %s | tail -1 | awk '{print $1}'\
            " % os.path.join(bucket_path, "magma.*/wal"))[0][0].split('\n')[0])
            keyTree += int(
                shell.execute_command("du -cm %s | tail -1 | awk '{print $1}'\
            " % os.path.join(bucket_path,
                             "magma.*/kv*/rev*/key*"))[0][0].split('\n')[0])
            seqTree += int(
                shell.execute_command("du -cm %s | tail -1 | awk '{print $1}'\
            " % os.path.join(bucket_path,
                             "magma.*/kv*/rev*/seq*"))[0][0].split('\n')[0])

            cmd = 'find ' + bucket_path + '/magma*/ -maxdepth 1 -type d \
            -print0 | while read -d "" -r dir; do files=("$dir"/*/*/*); \
            printf "%d,%s\n" "${#files[@]}" "$dir"; done'

            data_files = shell.execute_command(cmd)[0]
            for files in data_files:
                if "kvstore" in files and int(files.split(",")[0]) >= 300:
                    self.log.warn("Number of files in {}--{} is {}".format(
                        server.ip,
                        files.split(",")[1].rstrip(),
                        files.split(",")[0]))
            shell.disconnect()
        self.log.debug("Total Disk usage for kvstore is {}MB".format(kvstore))
        self.log.debug("Total Disk usage for wal is {}MB".format(wal))
        self.log.debug("Total Disk usage for keyTree is {}MB".format(keyTree))
        self.log.debug("Total Disk usage for seqTree is {}MB".format(seqTree))
        return kvstore, wal, keyTree, seqTree

    def print_stats(self):
        self.bucket_util.print_bucket_stats(self.cluster)
        self.cluster_util.print_cluster_stats(self.cluster)
        self.print_crud_stats()
        for bucket in self.cluster.buckets:
            self.get_bucket_dgm(bucket)
            if bucket.storageBackend == Bucket.StorageBackend.magma and not self.cluster.cloud_cluster:
                self.get_magma_disk_usage(bucket)
                self.check_fragmentation_using_magma_stats(bucket)
                self.check_fragmentation_using_kv_stats(bucket)

    def PrintStep(self, msg=None):
        print "\n"
        print "\t", "#" * 60
        print "\t", "#"
        print "\t", "#  %s" % msg
        print "\t", "#"
        print "\t", "#" * 60
        print "\n"

    def check_fragmentation_using_kv_stats(self, bucket, servers=None):
        result = dict()
        if servers is None:
            servers = self.cluster.kv_nodes + [self.cluster.master]
        if type(servers) is not list:
            servers = [servers]
        for server in servers:
            frag_val = self.bucket_util.get_fragmentation_kv(
                self.cluster, bucket, server)
            self.log.debug("Current Fragmentation for node {} is {} \
            ".format(server.ip, frag_val))
            result.update({server.ip: frag_val})
        self.log.info("KV stats fragmentation values {}".format(result))

    def dump_magma_stats(self, server, bucket, shard, kvstore):
        if bucket.storageBackend != Bucket.StorageBackend.magma or self.cluster.cloud_cluster:
            return
        shell = RemoteMachineShellConnection(server)
        data_path = RestConnection(server).get_data_path()
        while not self.stop_stats:
            for bucket in self.cluster.buckets:
                self.log.info(
                    self.get_magma_stats(bucket, server, "rw_0:magma"))
                self.dump_seq_index(shell, data_path, bucket.name, shard,
                                    kvstore)
            self.sleep(600)
        shell.disconnect()

    def dump_seq_index(self, shell, data_path, bucket, shard, kvstore):
        magma_path = os.path.join(data_path, bucket, "magma.{}")
        magma = magma_path.format(shard)
        cmd = '/opt/couchbase/bin/magma_dump {}'.format(magma)
        cmd += ' --kvstore {} --tree seq'.format(kvstore)
        result = shell.execute_command(cmd)[0]
        self.log.info("Seq Tree for {}:{}:{}:{}: \n{}".format(
            shell.ip, bucket, shard, kvstore, result))

    def check_fragmentation_using_magma_stats(self, bucket, servers=None):
        result = dict()
        stats = list()
        if servers is None:
            servers = self.cluster.kv_nodes + [self.cluster.master]
        if type(servers) is not list:
            servers = [servers]
        for server in servers:
            fragmentation_values = list()
            shell = RemoteMachineShellConnection(server)
            output = shell.execute_command(
                "lscpu | grep 'CPU(s)' | head -1 | awk '{print $2}'"
            )[0][0].split('\n')[0]
            shell.disconnect()
            self.log.debug("machine: {} - core(s): {}".format(
                server.ip, output))
            for i in range(min(int(output), 64)):
                grep_field = "rw_{}:magma".format(i)
                _res = self.get_magma_stats(bucket, server)
                fragmentation_values.append(
                    json.loads(_res[server.ip][grep_field])["Fragmentation"])
                stats.append(_res)
            result.update({server.ip: fragmentation_values})
        self.log.info(stats[0])
        res = list()
        for value in result.values():
            res.append(max(value))
        if max(res) < float(self.fragmentation) / 100:
            self.log.info("magma stats fragmentation result {} \
            ".format(result))
            return True
        self.log.info("magma stats fragmentation result {} \
        ".format(result))
        return False

    def get_magma_stats(self, bucket, server=None):
        magma_stats_for_all_servers = dict()
        cbstat_obj = Cbstats(server)
        result = cbstat_obj.magma_stats(bucket.name)
        magma_stats_for_all_servers[server.ip] = result
        return magma_stats_for_all_servers

    def pause_rebalance(self):
        rest = RestConnection(self.cluster.master)
        i = 1
        self.sleep(10, "Let the rebalance begin!")
        expected_progress = 20
        while expected_progress < 100:
            expected_progress = 20 * i
            reached = self.cluster_util.rebalance_reached(
                rest, expected_progress)
            self.assertTrue(
                reached, "Rebalance failed or did not reach {0}%".format(
                    expected_progress))
            if not self.cluster_util.is_cluster_rebalanced(rest):
                self.log.info("Stop the rebalance")
                stopped = rest.stop_rebalance(wait_timeout=self.wait_timeout /
                                              3)
                self.assertTrue(stopped, msg="Unable to stop rebalance")
                rebalance_task = self.task.async_rebalance(
                    self.cluster, [], [], retry_get_process_num=3000)
                self.sleep(
                    10, "Rebalance % ={}. Let the rebalance begin!".format(
                        expected_progress))
            i += 1
        return rebalance_task

    def abort_rebalance(self, rebalance, error_type="kill_memcached"):
        self.sleep(30, "Let the rebalance begin!")
        rest = RestConnection(self.cluster.master)
        i = 1
        expected_progress = 20
        rebalance_task = rebalance
        while expected_progress < 80:
            expected_progress = 20 * i
            reached = self.cluster_util.rebalance_reached(rest,
                                                          expected_progress,
                                                          wait_step=10,
                                                          num_retry=3600)
            self.assertTrue(
                reached, "Rebalance failed or did not reach {0}%".format(
                    expected_progress))

            if not self.cluster_util.is_cluster_rebalanced(rest):
                self.log.info("Abort rebalance")
                self._induce_error(error_type, self.cluster.nodes_in_cluster)
                result = self.check_coredump_exist(
                    self.cluster.nodes_in_cluster)
                if result:
                    self.task_manager.abort_all_tasks()
                    self.doc_loading_tm.abortAllTasks()
                    self.assertFalse(
                        result,
                        "CRASH | CRITICAL | WARN messages found in cb_logs")
                self.sleep(60, "Sleep after error introduction")
                self._recover_from_error(error_type)
                result = self.check_coredump_exist(
                    self.cluster.nodes_in_cluster)
                if result:
                    self.task_manager.abort_all_tasks()
                    self.doc_loading_tm.abortAllTasks()
                    self.assertFalse(
                        result,
                        "CRASH | CRITICAL | WARN messages found in cb_logs")
                try:
                    self.task_manager.get_task_result(rebalance_task)
                except RebalanceFailedException:
                    pass
                if rebalance.result:
                    self.log.error(
                        "Rebalance passed/finished which is not expected")
                    self.log.info(
                        "Rebalance % after rebalance finished = {}".format(
                            expected_progress))
                    return None
                else:
                    self.log.info(
                        "Restarting Rebalance after killing at {}".format(
                            expected_progress))
                    rebalance_task = self.task.async_rebalance(
                        self.cluster, [],
                        self.servs_out,
                        retry_get_process_num=3000)
                    self.sleep(120, "Let the rebalance begin after abort")
                    self.log.info("Rebalance % = {}".format(
                        self.rest._rebalance_progress()))
            i += 1
        return rebalance_task

    def crash_memcached(self, nodes=None, num_kills=1, graceful=False):
        self.stop_crash = False
        self.crash_count = 0
        if not nodes:
            nodes = self.cluster.kv_nodes + [self.cluster.master]

        while not self.stop_crash:
            self.get_memory_footprint()
            sleep = random.randint(60, 120)
            self.sleep(
                sleep, "Iteration:{} waiting to kill memc on all nodes".format(
                    self.crash_count))
            self.kill_memcached(nodes,
                                num_kills=num_kills,
                                graceful=graceful,
                                wait=True)
            self.crash_count += 1
            if self.crash_count > self.crashes:
                self.stop_crash = True
        self.sleep(300)

    def kill_memcached(self,
                       servers=None,
                       num_kills=1,
                       graceful=False,
                       wait=True):
        if not servers:
            servers = self.cluster.kv_nodes + [self.cluster.master]

        for server in servers:
            for _ in xrange(num_kills):
                if num_kills > 1:
                    self.sleep(
                        2,
                        "Sleep for 2 seconds b/w cont memc kill on same node.")
                shell = RemoteMachineShellConnection(server)
                if graceful:
                    shell.restart_couchbase()
                else:
                    shell.kill_memcached()
                shell.disconnect()
            self.sleep(
                5, "Sleep for 5 seconds before killing memc on next node.")

        result = self.check_coredump_exist(self.cluster.nodes_in_cluster)
        if result:
            self.stop_crash = True
            self.task_manager.abort_all_tasks()
            self.doc_loading_tm.abortAllTasks()
            self.assertFalse(
                result, "CRASH | CRITICAL | WARN messages found in cb_logs")

        if wait:
            for server in servers:
                self.check_warmup_complete(server)

    def check_warmup_complete(self, server):
        for bucket in self.cluster.buckets:
            start_time = time.time()
            result = self.bucket_util._wait_warmup_completed(
                [server],
                self.cluster.buckets[0],
                wait_time=self.wait_timeout * 20)
            if not result:
                self.stop_crash = True
                self.task_manager.abort_all_tasks()
                self.doc_loading_tm.abortAllTasks()
                self.assertTrue(
                    result,
                    "Warm-up failed in %s seconds" % (self.wait_timeout * 20))
            else:
                self.log.info("Bucket:%s warm-up completed in %s." %
                              (bucket.name, str(time.time() - start_time)))

    def set_num_writer_and_reader_threads(self,
                                          num_writer_threads="default",
                                          num_reader_threads="default",
                                          num_storage_threads="default"):
        bucket_helper = BucketHelper(self.cluster.master)
        bucket_helper.update_memcached_settings(
            num_writer_threads=num_writer_threads,
            num_reader_threads=num_reader_threads,
            num_storage_threads=num_storage_threads)