def test_chain_rebalance_out_cc(self): self.setup_for_test(skip_data_loading=True) self.ingestion_in_progress() total_cbas_nodes = len(self.otpNodes) while total_cbas_nodes > 1: cc_ip = self.cbas_util.retrieve_cc_ip(shell=self.shell) for otpnode in self.otpNodes: if otpnode.ip == cc_ip: self.cluster_util.remove_node(self.cluster, [otpnode], wait_for_rebalance=True) for server in self.cluster.cbas_nodes: if cc_ip != server.ip: self.cbas_util.closeConn() self.cbas_util = CbasUtil(self.cluster.master, server) self.cbas_util.createConn("default") self.cbas_node = server break # items_in_cbas_bucket, _ = self.cbas_util.get_num_items_in_cbas_dataset(self.cbas_dataset_name) # self.log.info("Items before service restart: %s"%items_in_cbas_bucket) items_in_cbas_bucket = 0 start_time = time.time() while (items_in_cbas_bucket == 0 or items_in_cbas_bucket == -1) and time.time() < start_time + 60: try: items_in_cbas_bucket, _ = self.cbas_util.get_num_items_in_cbas_dataset( self.cbas_dataset_name) except: pass self.sleep(1) self.log.info( "After rebalance operation docs in CBAS bucket : %s" % items_in_cbas_bucket) if items_in_cbas_bucket < self.num_items * 2 and items_in_cbas_bucket > self.num_items: self.log.info( "Data Ingestion Interrupted successfully") elif items_in_cbas_bucket < self.num_items: self.log.info( "Data Ingestion did interrupted and restarting from 0." ) else: self.log.info( "Data Ingestion did not interrupted but complete before rebalance operation." ) query = "select count(*) from {0};".format( self.cbas_dataset_name) self.cbas_util._run_concurrent_queries( query, "immediate", 10) break total_cbas_nodes -= 1 if not self.cbas_util.validate_cbas_dataset_items_count( self.cbas_dataset_name, self.num_items * 2): self.fail( "No. of items in CBAS dataset do not match that in the CB bucket" ) self.ingest_more_data()
def setUp(self): super(MultiNodeFailOver, self).setUp() self.log.info("Read the input params") self.nc_nc_fail_over = self.input.param("nc_nc_fail_over", True) self.create_secondary_indexes = self.input.param("create_secondary_indexes", False) # In this fail over we fail first 3 added cbas nodes[CC + first NC + Second NC] self.meta_data_node_failure = self.input.param("meta_data_node_failure", False) self.log.info("Add CBAS nodes to cluster") self.assertIsNotNone(self.cluster_util.add_node(self.cluster.cbas_nodes[0], services=["cbas"], rebalance=False), msg="Add node failed") self.assertIsNotNone(self.cluster_util.add_node(self.cluster.cbas_nodes[1], services=["cbas"], rebalance=True), msg="Add node failed") # This node won't be failed over if self.meta_data_node_failure: self.assertIsNotNone(self.cluster_util.add_node(self.cluster.cbas_nodes[2], services=["cbas"], rebalance=True), msg="Add node failed") self.log.info("Create connection") self.cbas_util.createConn(self.cb_bucket_name) self.log.info("Load documents in kv bucket") self.perform_doc_ops_in_all_cb_buckets("create", 0, self.num_items) self.log.info("Create dataset") self.cbas_util.create_dataset_on_bucket(self.cb_bucket_name, self.cbas_dataset_name) self.log.info("Create secondary index") if self.create_secondary_indexes: self.index_fields = "profession:string,number:bigint" create_idx_statement = "create index {0} on {1}({2});".format(self.index_name, self.cbas_dataset_name, self.index_fields) status, metrics, errors, results, _ = self.cbas_util.execute_statement_on_cbas_util(create_idx_statement) self.assertTrue(status == "success", "Create Index query failed") self.assertTrue(self.cbas_util.verify_index_created(self.index_name, self.index_fields.split(","), self.cbas_dataset_name)[0]) self.log.info("Connect Local link") self.cbas_util.connect_link() self.log.info("Validate dataset count") self.cbas_util.validate_cbas_dataset_items_count(self.cbas_dataset_name, self.num_items) self.log.info("Pick nodes to fail over") self.fail_over_nodes = [] if self.nc_nc_fail_over: self.log.info("This is NC+NC fail over") self.fail_over_nodes.append(self.cluster.cbas_nodes[0]) self.fail_over_nodes.append(self.cluster.cbas_nodes[1]) self.neglect_failures = False else: self.log.info("This is NC+CC fail over") self.fail_over_nodes.append(self.cluster.cbas_nodes[0]) self.fail_over_nodes.append(self.cbas_node) self.cbas_util.closeConn() self.cbas_util = CbasUtil(self.cluster.master, self.cluster.cbas_nodes[1], self.task) if self.meta_data_node_failure: self.fail_over_nodes.append(self.cluster.cbas_nodes[1]) self.cbas_util = CbasUtil(self.cluster.master, self.cluster.cbas_nodes[2], self.task) self.cbas_util.createConn(self.cb_bucket_name) self.neglect_failures = True
def test_rebalance_swap_multiple_cbas_on_a_busy_system(self): ''' 1. We have 4 node cluster with 1 KV and 3 CBAS. Assume the IPS end with 101(KV), 102(CBAS), 103(CBAS), 104(CBAS) 2, Post initial setup - 101 running KV and 102 running CBAS as CC node 3. As part of test test add an extra NC node that we will swap rebalance later - Adding 103 and rebalance 4. If swap rebalance NC - then select the node added in #3 for remove and 104 to add during swap 5. If swap rebalance CC - then select the CC node added for remove and 104 to add during swap ''' self.log.info('Read service input param') node_services = [] node_services.append(self.input.param('service', "cbas")) self.log.info("Rebalance in CBAS nodes, this node will be removed during swap") self.cluster_util.add_node(node=self.rebalanceServers[1], services=node_services) self.log.info("Setup CBAS") self.setup_for_test(skip_data_loading=True) self.log.info("Run KV ops in async while rebalance is in progress") json_generator = JsonGenerator() generators = json_generator.generate_docs_simple(docs_per_day=self.num_items, start=0) tasks = self.bucket_util._async_load_all_buckets(self.cluster, generators, "create", 0) self.log.info("Run concurrent queries to simulate busy system") statement = "select sleep(count(*),50000) from {0} where mutated=0;".format(self.cbas_dataset_name) handles = self.cbas_util._run_concurrent_queries(statement, self.mode, self.num_concurrent_queries) self.log.info("Fetch node to remove during rebalance") self.rebalance_cc = self.input.param("rebalance_cc", False) out_nodes = [] nodes = self.rest.node_statuses() reinitialize_cbas_util = False for node in nodes: if self.rebalance_cc and (node.ip == self.cbas_node.ip): out_nodes.append(node) reinitialize_cbas_util = True elif not self.rebalance_cc and node.ip == self.rebalanceServers[1].ip: out_nodes.append(node) self.log.info("Swap rebalance CBAS nodes") self.cluster_util.add_node(node=self.rebalanceServers[3], services=node_services, rebalance=False) self.remove_node([out_nodes[0]], wait_for_rebalance=True) self.log.info("Get KV ops result") for task in tasks: self.task_manager.get_task_result(task) if reinitialize_cbas_util is True: self.cbas_util = CbasUtil(self.cluster.master, self.rebalanceServers[3], self.task) self.cbas_util.createConn("default") self.log.info("Log concurrent query status") self.cbas_util.log_concurrent_query_outcome(self.cluster.master, handles) count_n1ql = self.rest.query_tool('select count(*) from %s' % (self.cb_bucket_name))['results'][0]['$1'] if not self.cbas_util.validate_cbas_dataset_items_count(self.cbas_dataset_name, count_n1ql, 0): self.fail("No. of items in CBAS dataset do not match that in the CB bucket")
def test_rebalance_out_multiple_cbas_on_a_busy_system(self): node_services = [] node_services.append(self.input.param('service',"cbas")) self.log.info("Rebalance in CBAS nodes") self.cluster_util.add_node(node=self.rebalanceServers[1], services=node_services) self.cluster_util.add_node(node=self.rebalanceServers[3], services=node_services) self.log.info("Setup CBAS") self.setup_for_test(skip_data_loading=True) self.log.info("Run KV ops in async while rebalance is in progress") json_generator = JsonGenerator() generators = json_generator.generate_docs_simple(docs_per_day=self.num_items, start=0) tasks = self.bucket_util._async_load_all_buckets(self.cluster, generators, "create", 0) self.log.info("Run concurrent queries to simulate busy system") statement = "select sleep(count(*),50000) from {0} where mutated=0;".format(self.cbas_dataset_name) handles = self.cbas_util._run_concurrent_queries(statement, self.mode, self.num_concurrent_queries) self.log.info("Fetch and remove nodes to rebalance out") self.rebalance_cc = self.input.param("rebalance_cc", False) out_nodes = [] nodes = self.rest.node_statuses() if self.rebalance_cc: for node in nodes: if node.ip == self.cbas_node.ip or node.ip == self.servers[1].ip: out_nodes.append(node) self.cbas_util.closeConn() self.log.info("Reinitialize CBAS utils with ip %s, since CC node is rebalanced out" %self.servers[3].ip) self.cbas_util = CbasUtil(self.cluster.master, self.servers[3], self.task) self.cbas_util.createConn("default") else: for node in nodes: if node.ip == self.servers[3].ip or node.ip == self.servers[1].ip: out_nodes.append(node) self.log.info("Rebalance out CBAS nodes %s %s" % (out_nodes[0].ip, out_nodes[1].ip)) self.remove_all_nodes_then_rebalance([out_nodes[0],out_nodes[1]]) self.log.info("Get KV ops result") for task in tasks: self.task_manager.get_task_result(task) self.log.info("Log concurrent query status") self.cbas_util.log_concurrent_query_outcome(self.cluster.master, handles) if not self.cbas_util.validate_cbas_dataset_items_count(self.cbas_dataset_name, self.num_items, 0): self.fail("No. of items in CBAS dataset do not match that in the CB bucket")
def setUp(self): super(MemCompressionUpgradeTests, self).setUp() self.cbas_util = CbasUtil(self.task) self.cbas_spec_name = self.input.param("cbas_spec", "local_datasets") self.rebalance_util = CBASRebalanceUtil( self.cluster_util, self.bucket_util, self.task, vbucket_check=True, cbas_util=self.cbas_util) cbas_cc_node_ip = None retry = 0 self.cluster.cbas_nodes = \ self.cluster_util.get_nodes_from_services_map( self.cluster, service_type="cbas", get_all_nodes=True, servers=self.cluster.nodes_in_cluster)
def test_swap_rebalance_cb_cbas_together(self): self.log.info("Creates cbas buckets and dataset") wait_for_rebalance = self.input.param("wait_for_rebalance", True) dataset_count_query = "select count(*) from {0};".format(self.cbas_dataset_name) self.setup_for_test() self.log.info("Add KV node and don't rebalance") self.cluster_util.add_node(node=self.rebalanceServers[1], rebalance=False) self.log.info("Add cbas node and don't rebalance") self.cluster_util.add_node(node=self.rebalanceServers[3], rebalance=False) otpnodes = [] nodes = self.rest.node_statuses() for node in nodes: if node.ip == self.rebalanceServers[0].ip or node.ip == self.rebalanceServers[2].ip: otpnodes.append(node) self.log.info("Remove master node") self.remove_node(otpnode=otpnodes, wait_for_rebalance=wait_for_rebalance) self.cluster.master = self.rebalanceServers[1] self.log.info("Create instances pointing to new master nodes") c_utils = CbasUtil(self.rebalanceServers[1], self.rebalanceServers[3], self.task) c_utils.createConn(self.cb_bucket_name) self.log.info("Create reference to SDK client") client = SDKClient(scheme="couchbase", hosts=[self.rebalanceServers[1].ip], bucket=self.cb_bucket_name, password=self.rebalanceServers[1].rest_password) self.log.info("Add more document to default bucket") documents = ['{"name":"value"}'] * (self.num_items//10) document_id_prefix = "custom-id-" client.insert_custom_json_documents(document_id_prefix, documents) self.log.info( "Run queries as rebalance is in progress : Rebalance state:%s" % self.rest._rebalance_progress_status()) handles = c_utils._run_concurrent_queries(dataset_count_query, "immediate", 2000, batch_size=self.concurrent_batch_size) self.log.info("Log concurrent query status") self.cbas_util.log_concurrent_query_outcome(self.cluster.master, handles) if not c_utils.validate_cbas_dataset_items_count(self.cbas_dataset_name, self.num_items + (self.num_items//10) , 0): self.fail("No. of items in CBAS dataset do not match that in the CB bucket")
class MultiNodeFailOver(CBASBaseTest): """ Class contains test cases for multiple analytics node failures.[CC+NC, NC+NC] """ def setUp(self): super(MultiNodeFailOver, self).setUp() self.log.info("Read the input params") self.nc_nc_fail_over = self.input.param("nc_nc_fail_over", True) self.create_secondary_indexes = self.input.param("create_secondary_indexes", False) # In this fail over we fail first 3 added cbas nodes[CC + first NC + Second NC] self.meta_data_node_failure = self.input.param("meta_data_node_failure", False) self.log.info("Add CBAS nodes to cluster") self.assertIsNotNone(self.cluster_util.add_node(self.cluster.cbas_nodes[0], services=["cbas"], rebalance=False), msg="Add node failed") self.assertIsNotNone(self.cluster_util.add_node(self.cluster.cbas_nodes[1], services=["cbas"], rebalance=True), msg="Add node failed") # This node won't be failed over if self.meta_data_node_failure: self.assertIsNotNone(self.cluster_util.add_node(self.cluster.cbas_nodes[2], services=["cbas"], rebalance=True), msg="Add node failed") self.log.info("Create connection") self.cbas_util.createConn(self.cb_bucket_name) self.log.info("Load documents in kv bucket") self.perform_doc_ops_in_all_cb_buckets("create", 0, self.num_items) self.log.info("Create dataset") self.cbas_util.create_dataset_on_bucket(self.cb_bucket_name, self.cbas_dataset_name) self.log.info("Create secondary index") if self.create_secondary_indexes: self.index_fields = "profession:string,number:bigint" create_idx_statement = "create index {0} on {1}({2});".format(self.index_name, self.cbas_dataset_name, self.index_fields) status, metrics, errors, results, _ = self.cbas_util.execute_statement_on_cbas_util(create_idx_statement) self.assertTrue(status == "success", "Create Index query failed") self.assertTrue(self.cbas_util.verify_index_created(self.index_name, self.index_fields.split(","), self.cbas_dataset_name)[0]) self.log.info("Connect Local link") self.cbas_util.connect_link() self.log.info("Validate dataset count") self.cbas_util.validate_cbas_dataset_items_count(self.cbas_dataset_name, self.num_items) self.log.info("Pick nodes to fail over") self.fail_over_nodes = [] if self.nc_nc_fail_over: self.log.info("This is NC+NC fail over") self.fail_over_nodes.append(self.cluster.cbas_nodes[0]) self.fail_over_nodes.append(self.cluster.cbas_nodes[1]) self.neglect_failures = False else: self.log.info("This is NC+CC fail over") self.fail_over_nodes.append(self.cluster.cbas_nodes[0]) self.fail_over_nodes.append(self.cbas_node) self.cbas_util.closeConn() self.cbas_util = CbasUtil(self.cluster.master, self.cluster.cbas_nodes[1], self.task) if self.meta_data_node_failure: self.fail_over_nodes.append(self.cluster.cbas_nodes[1]) self.cbas_util = CbasUtil(self.cluster.master, self.cluster.cbas_nodes[2], self.task) self.cbas_util.createConn(self.cb_bucket_name) self.neglect_failures = True def test_cbas_multi_node_fail_over(self): self.log.info("fail-over the node") fail_over_task = self._cb_cluster.async_failover(self.input.servers, self.fail_over_nodes) self.assertTrue(self.task_manager.get_task_result(fail_over_task), msg="Fail over of nodes failed") self.log.info("Rebalance remaining nodes") result = self.cluster_util.rebalance() self.assertTrue(result, "Rebalance operation failed") self.log.info("Validate dataset count") self.assertTrue(self.cbas_util.validate_cbas_dataset_items_count(self.cbas_dataset_name, self.num_items), msg="Document count mismatch") def test_cbas_multi_node_fail_over_busy_system(self): self.log.info("Perform doc operation async") tasks = self.perform_doc_ops_in_all_cb_buckets( "create", start_key=self.num_items, end_key=self.num_items+(self.num_items/4), _async=True) self.log.info("Run concurrent queries to simulate busy system") statement = "select sleep(count(*),50000) from {0} where mutated=0;".format(self.cbas_dataset_name) try: self.cbas_util._run_concurrent_queries(statement, "async", 10, batch_size=10) except Exception as e: if self.neglect_failures: self.log.info("Neglecting failed queries, to handle node fail over CC") else: raise e self.log.info("fail-over the node") fail_over_task = self._cb_cluster.async_failover(self.input.servers, self.fail_over_nodes) self.assertTrue(self.task_manager.get_task_result(fail_over_task), msg="Fail over of nodes failed") self.log.info("Rebalance remaining nodes") result = self.cluster_util.rebalance() self.assertTrue(result, "Rebalance operation failed") for task in tasks: self.log.info(self.task_manager.get_task_result(task)) self.log.info("Validate dataset count") self.assertTrue(self.cbas_util.validate_cbas_dataset_items_count(self.cbas_dataset_name, self.num_items + self.num_items/4), msg="Document count mismatch") def tearDown(self): super(MultiNodeFailOver, self).tearDown()
def test_auto_retry_failed_rebalance(self): # Auto-retry rebalance settings body = {"enabled": "true", "afterTimePeriod": self.retry_time, "maxAttempts": self.num_retries} rest = RestConnection(self.cluster.master) rest.set_retry_rebalance_settings(body) result = rest.get_retry_rebalance_settings() self.log.info("Pick the incoming and outgoing nodes during rebalance") self.rebalance_type = self.input.param("rebalance_type", "in") nodes_to_add = [self.rebalanceServers[1]] nodes_to_remove = [] reinitialize_cbas_util = False if self.rebalance_type == 'out': nodes_to_remove.append(self.rebalanceServers[1]) self.cluster_util.add_node(self.rebalanceServers[1]) nodes_to_add = [] elif self.rebalance_type == 'swap': self.cluster_util.add_node(nodes_to_add[0], rebalance=False) nodes_to_remove.append(self.cbas_node) reinitialize_cbas_util = True self.log.info("Incoming nodes - %s, outgoing nodes - %s. For rebalance type %s " % ( nodes_to_add, nodes_to_remove, self.rebalance_type)) self.log.info("Creates cbas buckets and dataset") dataset_count_query = "select count(*) from {0};".format(self.cbas_dataset_name) self.setup_for_test() self.log.info("Perform async doc operations on KV") json_generator = JsonGenerator() generators = json_generator.generate_docs_simple(docs_per_day=self.num_items * 3 / 2, start=self.num_items) kv_task = self.bucket_util._async_load_all_buckets(self.cluster, generators, "create", 0, batch_size=5000) self.log.info("Run concurrent queries on CBAS") handles = self.cbas_util._run_concurrent_queries(dataset_count_query, "async", self.num_concurrent_queries) self.log.info("Fetch the server to restart couchbase on") restart_couchbase_on_incoming_or_outgoing_node = self.input.param( "restart_couchbase_on_incoming_or_outgoing_node", True) if not restart_couchbase_on_incoming_or_outgoing_node: node = self.cbas_node else: node = self.rebalanceServers[1] shell = RemoteMachineShellConnection(node) try: self.log.info("Rebalance nodes") self.task.async_rebalance(self.servers, nodes_to_add, nodes_to_remove) self.sleep(10, message="Restarting couchbase after 10s on node %s" % node.ip) shell.restart_couchbase() self.sleep(30, message="Waiting for service to be back again...") self.sleep(self.retry_time, "Wait for retry time to complete and then check the rebalance results") reached = RestHelper(self.rest).rebalance_reached(wait_step=120) self.log.info("Rebalance status : {0}".format(reached)) self.sleep(20) self._check_retry_rebalance_succeeded() if reinitialize_cbas_util is True: self.cbas_util = CbasUtil(self.cluster.master, self.rebalanceServers[1], self.task) self.cbas_util.createConn("default") self.cbas_util.wait_for_cbas_to_recover() self.log.info("Get KV ops result") for task in kv_task: self.task_manager.get_task_result(task) self.log.info("Log concurrent query status") self.cbas_util.log_concurrent_query_outcome(self.cluster.master, handles) self.log.info("Validate dataset count on CBAS") if not self.cbas_util.validate_cbas_dataset_items_count(self.cbas_dataset_name, self.num_items * 3 / 2, 0): self.fail("No. of items in CBAS dataset do not match that in the CB bucket") except Exception as e: self.fail("Some exception occurred : {0}".format(e.message)) finally: body = {"enabled": "false"} rest.set_retry_rebalance_settings(body)
def test_logging_configurations_are_shared_across_cbas_node(self): self.log.info("Add a cbas node") result = self.cluster_util.add_node(self.cluster.cbas_nodes[0], services=["cbas"], rebalance=True) self.assertTrue(result, msg="Failed to add CBAS node") self.log.info("Delete all loggers") self.cbas_util.delete_all_loggers_on_cbas() self.log.info( "Set the logging level using json object from default logger config dictionary on master cbas node" ) status, content, response = self.cbas_util.set_log_level_on_cbas( CbasLogging.DEFAULT_LOGGER_CONFIG_DICT) self.assertTrue(status, msg="Response status incorrect for SET request") self.log.info("Verify logging configuration that we set on cbas Node") for name, level in CbasLogging.DEFAULT_LOGGER_CONFIG_DICT.items(): status, content, response = self.cbas_util.get_specific_cbas_log_level( name) self.assertTrue(status, msg="Response status incorrect for GET request") self.assertEquals(content, level, msg="Logger configuration mismatch for logger " + name) self.sleep( timeout=10, message= "Waiting for logger configuration to be copied across cbas nodes") self.log.info("Verify logging configuration on other cbas node") for name, level in CbasLogging.DEFAULT_LOGGER_CONFIG_DICT.items(): status, content, response = CbasUtil( self.cluster.master, self.cluster.cbas_nodes[0]).get_specific_cbas_log_level(name) self.assertTrue(status, msg="Response status incorrect for GET request") self.assertEquals(content, level, msg="Logger configuration mismatch for logger " + name) self.log.info("Update logging configuration on other cbas node") logger_level = self.input.param("logger_level", "FATAL") logger_name = self.input.param("logger_name", "org.apache.asterix") status, content, response = CbasUtil( self.cluster.master, self.cluster.cbas_nodes[0]).set_specific_log_level_on_cbas( logger_name, logger_level) self.assertTrue(status, msg="Status mismatch for SET") self.sleep( timeout=10, message= "Waiting for logger configuration to be copied across cbas nodes") self.log.info("Assert log level on master cbas node") status, content, response = self.cbas_util.get_specific_cbas_log_level( logger_name) self.assertTrue(status, msg="Status mismatch for GET") self.assertEquals(content, logger_level, msg="Logger configuration mismatch for " + logger_name)
class CBASBaseTest(BaseTestCase): def setUp(self, add_default_cbas_node=True): super(CBASBaseTest, self).setUp() if self._testMethodDoc: self.log.info("Starting Test: %s - %s" % (self._testMethodName, self._testMethodDoc)) else: self.log.info("Starting Test: %s" % self._testMethodName) invalid_ip = '10.111.151.109' self.cb_bucket_name = self.input.param('cb_bucket_name', 'travel-sample') self.cbas_bucket_name = self.input.param('cbas_bucket_name', 'travel') self.cb_bucket_password = self.input.param('cb_bucket_password', None) self.cb_server_ip = self.input.param("cb_server_ip", None) self.cb_server_ip = \ self.cb_server_ip.replace('INVALID_IP', invalid_ip) \ if self.cb_server_ip is not None else None self.cbas_dataset_name = self.input.param("cbas_dataset_name", 'travel_ds') self.cbas_bucket_name_invalid = \ self.input.param('cbas_bucket_name_invalid', self.cbas_bucket_name) self.cbas_dataset2_name = self.input.param('cbas_dataset2_name', None) self.skip_create_dataset = self.input.param('skip_create_dataset', False) self.disconnect_if_connected = \ self.input.param('disconnect_if_connected', False) self.cbas_dataset_name_invalid = \ self.input.param('cbas_dataset_name_invalid', self.cbas_dataset_name) self.skip_drop_connection = self.input.param('skip_drop_connection', False) self.skip_drop_dataset = self.input.param('skip_drop_dataset', False) self.query_id = self.input.param('query_id', None) self.mode = self.input.param('mode', None) self.num_concurrent_queries = self.input.param('num_queries', 5000) self.concurrent_batch_size = self.input.param('concurrent_batch_size', 100) self.compiler_param = self.input.param('compiler_param', None) self.compiler_param_val = self.input.param('compiler_param_val', None) self.expect_reject = self.input.param('expect_reject', False) self.expect_failure = self.input.param('expect_failure', False) self.compress_dataset = self.input.param('compress_dataset', False) self.index_name = self.input.param('index_name', "NoName") self.index_fields = self.input.param('index_fields', None) if self.index_fields: self.index_fields = self.index_fields.split("-") self.retry_time = self.input.param("retry_time", 300) self.num_retries = self.input.param("num_retries", 1) self.sample_bucket_dict = { TravelSample().name: TravelSample(), BeerSample().name: BeerSample() } self.sample_bucket = None self.flush_enabled = Bucket.FlushBucket.ENABLED self.test_abort_snapshot = self.input.param("test_abort_snapshot", False) self.cbas_spec_name = self.input.param("cbas_spec", None) self._cb_cluster = self.get_clusters() self.expected_error = self.input.param("error", None) self.bucket_spec = self.input.param("bucket_spec", None) self.doc_spec_name = self.input.param("doc_spec_name", "initial_load") self.set_cbas_memory_from_available_free_memory = self.input.param( 'set_cbas_memory_from_available_free_memory', False) self.parallel_load_percent = int( self.input.param("parallel_load_percent", 0)) self.cbas_kill_count = self.input.param("cbas_kill_count", 0) self.memcached_kill_count = self.input.param("memcached_kill_count", 0) self.tamper_links_count = self.input.param("tamper_links_count", 0) self.cbas_node = None self.cbas_memory_quota_percent = int( self.input.param("cbas_memory_quota_percent", 100)) self.bucket_size = self.input.param("bucket_size", 100) services = None nodes_init = None # Single cluster support if len(self._cb_cluster) == 1: self._cb_cluster = self._cb_cluster[0] self.cluster.nodes_in_cluster.extend([self.cluster.master]) if self.services_init and self.nodes_init >= 3: if len(self.cluster.servers) < self.nodes_init or \ len(self.services_init.split("-")) != self.nodes_init: self.fail("Configuration error. Re-check nodes_init, " "services_init in .conf file and servers " "available in .ini " "file") services = list() for service in self.services_init.split( "-")[1:self.nodes_init]: services.append(service.replace(":", ",")) # Initialize cluster using given nodes nodes_init = list( filter(lambda node: node.ip != self.cluster.master.ip, self.cluster.servers[1:self.nodes_init])) for node, services_init in map(None, nodes_init, services): if services_init is None: services.append("kv") if not self.cbas_node and "cbas" in services_init: self.cbas_node = node self.cbas_node.services = services_init idx = self.cluster.servers.index(node) self.cluster.servers[idx].services = services_init for server in self.cluster.servers: if "cbas" in server.services: self.cluster.cbas_nodes.append(server) if "kv" in server.services: self.cluster.kv_nodes.append(server) rest = RestConnection(server) rest.set_data_path(data_path=server.data_path, index_path=server.index_path, cbas_path=server.cbas_path) if self.expected_error: self.expected_error = \ self.expected_error.replace("INVALID_IP", invalid_ip) self.expected_error = \ self.expected_error.replace("PORT", self.cluster.master.port) self.otpNodes = [] self.cbas_path = server.cbas_path self.rest = RestConnection(self.cluster.master) if not self.set_cbas_memory_from_available_free_memory: self.log.info( "Setting the min possible memory quota so that adding " "more nodes to the cluster wouldn't be a problem.") self.rest.set_service_mem_quota({ CbServer.Settings.KV_MEM_QUOTA: MIN_KV_QUOTA, CbServer.Settings.FTS_MEM_QUOTA: FTS_QUOTA, CbServer.Settings.INDEX_MEM_QUOTA: INDEX_QUOTA }) self.set_cbas_memory_from_available_free_memory = \ self.input.param( 'set_cbas_memory_from_available_free_memory', False) self.log.info("Setting %d memory quota for CBAS" % CBAS_QUOTA) self.cbas_memory_quota = CBAS_QUOTA self.rest.set_service_mem_quota( {CbServer.Settings.CBAS_MEM_QUOTA: CBAS_QUOTA}) if self.expected_error: self.expected_error = \ self.expected_error.replace("INVALID_IP", invalid_ip) self.expected_error = \ self.expected_error.replace("PORT", self.cluster.master.port) self.cbas_util = None if self.cluster.cbas_nodes: if not self.cbas_node: available_cbas_nodes = list( filter(lambda node: node.ip != self.cluster.master.ip, self.cluster.cbas_nodes)) self.cbas_node = available_cbas_nodes[0] if self.set_cbas_memory_from_available_free_memory: self.set_memory_for_services(self.rest, self.cluster_util, self.cbas_node, self.cbas_node.services) self.cbas_util = CbasUtil(self.cluster.master, self.cbas_node) self.cbas_util_v2 = CbasUtilV2(self.cluster.master, self.cbas_node, self.task) if "cbas" in self.cluster.master.services: self.cleanup_cbas() if add_default_cbas_node: if self.cluster.master.ip != self.cbas_node.ip: self.otpNodes.append( self.cluster_util.add_node(self.cbas_node)) self.cluster.nodes_in_cluster.append(self.cbas_node) if nodes_init: idx = nodes_init.index(self.cbas_node) services.pop(idx) nodes_init.remove(self.cbas_node) else: self.otpNodes = self.rest.node_statuses() ''' This cbas cleanup is actually not needed. When a node is added to the cluster, it is automatically cleaned-up.''' self.cleanup_cbas() self.cluster.cbas_nodes.remove(self.cbas_node) if nodes_init: self.task.rebalance([self.cluster.master], nodes_init, [], services=services) self.cluster.nodes_in_cluster.extend(nodes_init) if self.bucket_spec is not None: try: self.collectionSetUp(self.cluster, self.bucket_util, self.cluster_util) except Java_base_exception as exception: self.handle_collection_setup_exception(exception) except Exception as exception: self.handle_collection_setup_exception(exception) else: if self.default_bucket: self.bucket_util.create_default_bucket( self.cluster, bucket_type=self.bucket_type, ram_quota=self.bucket_size, replica=self.num_replicas, conflict_resolution=self. bucket_conflict_resolution_type, replica_index=self.bucket_replica_index, storage=self.bucket_storage, eviction_policy=self.bucket_eviction_policy, flush_enabled=self.flush_enabled) elif self.cb_bucket_name in self.sample_bucket_dict.keys(): self.sample_bucket = \ self.sample_bucket_dict[self.cb_bucket_name] elif len(self._cb_cluster) > 1: # Multi Cluster Support for cluster in self._cb_cluster: for server in cluster.servers: if CbServer.Services.CBAS in server.services: cluster.cbas_nodes.append(server) if CbServer.Services.KV in server.services: cluster.kv_nodes.append(server) rest = RestConnection(server) rest.set_data_path(data_path=server.data_path, index_path=server.index_path, cbas_path=server.cbas_path) if self.expected_error: cluster.expected_error = \ self.expected_error.replace("INVALID_IP", invalid_ip) cluster.expected_error = \ self.expected_error.replace("PORT", cluster.master.port) cluster.otpNodes = list() cluster.cbas_path = server.cbas_path cluster.rest = RestConnection(cluster.master) if not self.set_cbas_memory_from_available_free_memory: self.log.info( "Setting the min possible memory quota so that adding " "more nodes to the cluster wouldn't be a problem.") cluster.rest.set_service_mem_quota({ CbServer.Settings.KV_MEM_QUOTA: MIN_KV_QUOTA, CbServer.Settings.FTS_MEM_QUOTA: FTS_QUOTA, CbServer.Settings.INDEX_MEM_QUOTA: INDEX_QUOTA }) cluster.set_cbas_memory_from_available_free_memory = \ self.input.param( 'set_cbas_memory_from_available_free_memory', False) self.log.info("Setting %d memory quota for CBAS" % CBAS_QUOTA) cluster.cbas_memory_quota = CBAS_QUOTA cluster.rest.set_service_mem_quota( {CbServer.Settings.CBAS_MEM_QUOTA: CBAS_QUOTA}) cluster.cbas_util = None # Drop any existing buckets and datasets if cluster.cbas_nodes: cluster.cbas_node = cluster.cbas_nodes[0] if self.set_cbas_memory_from_available_free_memory: self.set_memory_for_services( cluster.rest, cluster.cluster_util, cluster.cbas_node, cluster.cbas_node.services) cluster.cbas_util = CbasUtil(cluster.master, cluster.cbas_node, self.task) cluster.cbas_util_v2 = CbasUtilV2(cluster.master, cluster.cbas_node) if "cbas" in cluster.master.services: self.cleanup_cbas(cluster.cbas_util) if add_default_cbas_node: if cluster.master.ip != cluster.cbas_node.ip: cluster.otpNodes.append( cluster.cluster_util.add_node( cluster, cluster.cbas_node)) else: cluster.otpNodes = cluster.rest.node_statuses() """ This cbas cleanup is actually not needed. When a node is added to the cluster, it is automatically cleaned-up. """ self.cleanup_cbas(cluster.cbas_util) cluster.cbas_nodes.remove(cluster.cbas_node) if self.bucket_spec is not None: try: self.collectionSetUp(cluster, cluster.bucket_util, cluster.cluster_util) except Java_base_exception as exception: self.handle_collection_setup_exception(exception) except Exception as exception: self.handle_collection_setup_exception(exception) else: if self.default_bucket: cluster.bucket_util.create_default_bucket( self.cluster, bucket_type=self.bucket_type, ram_quota=self.bucket_size, replica=self.num_replicas, conflict_resolution=self. bucket_conflict_resolution_type, replica_index=self.bucket_replica_index, storage=self.bucket_storage, eviction_policy=self.bucket_eviction_policy, flush_enabled=self.flush_enabled) elif self.cb_bucket_name in self.sample_bucket_dict.keys(): self.sample_bucket = self.sample_bucket_dict[ self.cb_bucket_name] cluster.bucket_util.add_rbac_user(self.cluster.master) else: self.fail("No cluster is available") self.log.info( "=== CBAS_BASE setup was finished for test #{0} {1} ===".format( self.case_number, self._testMethodName)) def tearDown(self): if len(self.get_clusters()) == 1: self.cbas_util.closeConn() elif len(self.get_clusters()) > 1: for cluster in self._cb_cluster: if cluster.cbas_util: cluster.cbas_util.closeConn() super(CBASBaseTest, self).tearDown() def cbas_logger(self, msg, type="INFO"): if type.upper() == "INFO": self.log.info("*" * 10 + msg + "*" * 10) if type.upper() == "DEBUG": self.log.debug("*" * 10 + msg + "*" * 10) if type.upper() == "ERROR": self.log.error("*" * 10 + msg + "*" * 10) def cleanup_cbas(self, cbas_util=None): """ Drops all connections, datasets and buckets from CBAS :param cbas_util: CbasUtil object. """ if not cbas_util: cbas_util = self.cbas_util try: # Disconnect from all connected buckets cmd_get_buckets = "select Name from Metadata.`Bucket`;" status, metrics, errors, results, _ = cbas_util.execute_statement_on_cbas_util( cmd_get_buckets) if (results is not None) & (len(results) > 0): for row in results: cbas_util.disconnect_from_bucket( row['Name'], disconnect_if_connected=True) self.cbas_logger("Disconnected all buckets") else: self.cbas_logger("No buckets to disconnect") # Drop all datasets cmd_get_datasets = "select DatasetName from Metadata.`Dataset` " \ "where DataverseName != \"Metadata\";" status, metrics, errors, results, _ = \ cbas_util.execute_statement_on_cbas_util(cmd_get_datasets) if (results is not None) & (len(results) > 0): for row in results: cbas_util.drop_dataset("`" + row['DatasetName'] + "`") self.cbas_logger("Dropped all datasets") else: self.cbas_logger("No datasets to drop") # Drop all buckets status, metrics, errors, results, _ = \ cbas_util.execute_statement_on_cbas_util(cmd_get_buckets) if (results is not None) & (len(results) > 0): for row in results: cbas_util.drop_cbas_bucket("`" + row['Name'] + "`") self.cbas_logger("Dropped all buckets") else: self.cbas_logger("No buckets to drop") self.log.info("Drop Dataverse other than Default and Metadata") cmd_get_dataverse = 'select DataverseName from Metadata.`Dataverse` where DataverseName != "Metadata" and DataverseName != "Default";' status, metrics, errors, results, _ = \ cbas_util.execute_statement_on_cbas_util(cmd_get_dataverse) if (results is not None) & (len(results) > 0): for row in results: cbas_util.disconnect_link("`" + row['DataverseName'] + "`" + ".Local") cbas_util.drop_dataverse_on_cbas( dataverse_name="`" + row['DataverseName'] + "`") self.cbas_logger( "Dropped all dataverse except Default and Metadata") else: self.cbas_logger("No dataverse to drop") except Exception as e: self.log.info(e.message) def perform_doc_ops_in_all_cb_buckets(self, operation, start_key=0, end_key=1000, batch_size=10, exp=0, _async=False, durability="", mutation_num=0, cluster=None, buckets=[], key=None): """ Create/Update/Delete docs in all cb buckets :param operation: String - "create","update","delete" :param start_key: Doc Key to start the operation with :param end_key: Doc Key to end the operation with :param batch_size: Batch size of doc_ops :param exp: MaxTTL used for doc operations :param _async: Boolean to decide whether to start ops in parallel :param durability: Durability level to use for doc operation :param mutation_num: Mutation count to keep track per doc_loading :param cluster: cluster object for cluster on which this doc load operation has to be performed. :param buckets: list of buckets on which doc load operation has to be performed. :param key: key for the generated docs :return: """ first = ['james', 'sharon', 'dave', 'bill', 'mike', 'steve'] profession = ['doctor', 'lawyer'] template_obj = JsonObject.create() template_obj.put("number", 0) template_obj.put("first_name", "") template_obj.put("profession", "") template_obj.put("mutated", mutation_num) template_obj.put("mutation_type", "ADD") if not key: key = "test_docs" doc_gen = DocumentGenerator(key, template_obj, start=start_key, end=end_key, randomize=False, first_name=first, profession=profession, number=range(70)) if cluster: bucket_util = cluster.bucket_util else: cluster = self.cluster bucket_util = self.bucket_util try: if _async: if buckets: for bucket in buckets: return bucket_util.async_load_bucket( cluster, bucket, doc_gen, operation, exp, durability=durability, batch_size=batch_size, suppress_error_table=True) else: return bucket_util._async_load_all_buckets( cluster, doc_gen, operation, exp, durability=durability, batch_size=batch_size, suppress_error_table=True) else: bucket_util.sync_load_all_buckets(cluster, doc_gen, operation, exp, durability=durability, batch_size=batch_size, suppress_error_table=True) except Exception as e: self.log.error(e.message) def remove_node(self, cluster, otpnode=None, wait_for_rebalance=True, rest=None): """ Method to remove nodes from a cluster. :param otpnode: list of nodes to be removed. :param wait_for_rebalance: boolean, wait for rebalance to finish after removing the nodes. :param rest: RestConnection object """ if not rest: rest = RestConnection(cluster.master) nodes = rest.node_statuses() '''This is the case when master node is running cbas service as well''' if len(nodes) <= len(otpnode): return helper = RestHelper(rest) try: removed = helper.remove_nodes( knownNodes=[node.id for node in nodes], ejectedNodes=[node.id for node in otpnode], wait_for_rebalance=wait_for_rebalance) except Exception: self.sleep(5, "Rebalance failed on Removal. Retry.. THIS IS A BUG") removed = helper.remove_nodes( knownNodes=[node.id for node in nodes], ejectedNodes=[node.id for node in otpnode], wait_for_rebalance=wait_for_rebalance) if wait_for_rebalance: self.assertTrue( removed, "Rebalance operation failed while removing %s" % otpnode) def create_dataverse_link_map(self, cbas_util, dataverse=0, link=0): """ This function creates a hash map, depicting links in different dataverses. :param dataverse: Number of dataverses to be created. Default value of 0 will not create any dataverse, and any link if present will be associated with the "Default" dataverse. :param link: total number of links to be created. :returns hash map with dataverse names as keys and associated links as values. Sample dataverse map: Note - Default dataverse will always be present Note - 2 different dataverses can have links with same name. dataverse_map = { "dataverse1": { "link_1" : { "link_property_1": "value", "link_property_2": "value", ... }, "link_2" : { "link_property_1": "value", "link_property_2": "value", ... } }, "Default": { "link_1" : { "link_property_1": "value", "link_property_2": "value", ... } } } """ dataverse_map = dict() dataverse_map["Default"] = dict() link_created = 0 for i in range(1, dataverse + 1): dataverse_name = "dataverse_{0}".format(str(i)) if cbas_util.create_dataverse_on_cbas( dataverse_name=dataverse_name): dataverse_map[dataverse_name] = dict() if link and (link_created < link): for j in range(1, random.randint(0, link - link_created) + 1): link_name = "link_{0}".format(str(j)) dataverse_map[dataverse_name][link_name] = dict() link_created += 1 else: self.log.error("Creation of dataverse %s failed." % dataverse_name) for key in dataverse_map.keys(): if key != "Dafault": cbas_util.drop_dataverse_on_cbas(dataverse_name=key) del dataverse_map[key] raise Exception("Dataverse creation failed") while link_created < link: dataverse_map["Default"]["link_{0}".format( str(link_created))] = dict() link_created += 1 return dataverse_map def create_or_delete_users(self, rbac_util, rbac_users_created, delete=False): """ Creates all types of rbac users. """ if delete: for user in rbac_users_created: try: rbac_util._drop_user(user) del (rbac_users_created[user]) except: pass else: for role in RbacUtils.cb_server_roles: if "[*]" in role: user = role.replace("[*]", "") else: user = role rbac_users_created[user] = role rbac_util._create_user_and_grant_role(user, role) def create_testcase_for_rbac_user(self, description, rbac_users_created): testcases = [] for user in rbac_users_created: if user in ["admin", "analytics_admin", self.analytics_username]: test_params = { "description": description.format(user), "validate_error_msg": False } elif user in [ "security_admin_local", "security_admin_external", "query_external_access", "query_system_catalog", "replication_admin", "ro_admin", "bucket_full_access", "replication_target", "mobile_sync_gateway", "data_reader", "data_writer", "data_dcp_reader", "data_monitoring", "views_admin", "views_reader", "query_delete", "query_insert", "query_manage_index", "query_select", "query_update", "fts_admin", "fts_searcher", "cluster_admin", "bucket_admin", "analytics_manager", "data_backup", "analytics_select", "analytics_reader" ]: test_params = { "description": description.format(user), "validate_error_msg": True, "expected_error": "User must have permission", } else: test_params = { "description": description.format(user), "validate_error_msg": True, "expected_error": "Unauthorized user" } test_params["username"] = user testcases.append(test_params) return testcases def remove_and_return_new_list(self, itemlist, item_to_remove): try: itemlist.remove(item_to_remove) except Exception: pass finally: return itemlist def set_primary_index(self, rest, bucket_name): query = "CREATE PRIMARY INDEX ON `{0}`;".format(bucket_name) result = rest.query_tool(query) if result["status"] == "success": return True else: return False def convert_string_to_bool(self, value): if isinstance(value, str) or isinstance(value, unicode): if value.lower() == "true": return True elif value.lower() == "false": return False else: return value def handle_collection_setup_exception(self, exception_obj): if self.sdk_client_pool is not None: self.sdk_client_pool.shutdown() traceback.print_exc() raise exception_obj def collectionSetUp(self, cluster, bucket_util, cluster_util, load_data=True, buckets_spec=None, doc_loading_spec=None): """ Setup the buckets, scopes and collecitons based on the spec passed. """ self.over_ride_spec_params = self.input.param("override_spec_params", "").split(";") self.remove_default_collection = self.input.param( "remove_default_collection", False) # Create bucket(s) and add rbac user bucket_util.add_rbac_user(self.cluster.master) if not buckets_spec: buckets_spec = bucket_util.get_bucket_template_from_package( self.bucket_spec) # Process params to over_ride values if required self.over_ride_bucket_template_params(buckets_spec) bucket_util.create_buckets_using_json_data(self.cluster, buckets_spec) bucket_util.wait_for_collection_creation_to_complete(self.cluster) # Prints bucket stats before doc_ops bucket_util.print_bucket_stats(self.cluster) # Init sdk_client_pool if not initialized before if self.sdk_client_pool is None: self.init_sdk_pool_object() self.log.info("Creating required SDK clients for client_pool") CollectionBase.create_sdk_clients(self.task_manager.number_of_threads, cluster.master, bucket_util.buckets, self.sdk_client_pool, self.sdk_compression) cluster_util.print_cluster_stats(self.cluster) if load_data: self.load_data_into_buckets(cluster, bucket_util, doc_loading_spec) def load_data_into_buckets(self, cluster, bucket_util, doc_loading_spec=None): """ Loads data into buckets using the data spec """ if not doc_loading_spec: doc_loading_spec = bucket_util.get_crud_template_from_package( self.doc_spec_name) self.over_ride_doc_loading_template_params(doc_loading_spec) # MB-38438, adding CollectionNotFoundException in retry exception doc_loading_spec[MetaCrudParams.RETRY_EXCEPTIONS].append( SDKException.CollectionNotFoundException) doc_loading_task = bucket_util.run_scenario_from_spec( self.task, cluster, bucket_util.buckets, doc_loading_spec, mutation_num=0, batch_size=self.batch_size) if doc_loading_task.result is False: self.fail("Initial reloading failed") ttl_buckets = [ "multi_bucket.buckets_for_rebalance_tests_with_ttl", "multi_bucket.buckets_all_membase_for_rebalance_tests_with_ttl", "volume_templates.buckets_for_volume_tests_with_ttl" ] # Verify initial doc load count bucket_util._wait_for_stats_all_buckets(self.cluster, self.cluster.buckets) if self.bucket_spec not in ttl_buckets: bucket_util.validate_docs_per_collections_all_buckets(self.cluster) def over_ride_bucket_template_params(self, bucket_spec): for over_ride_param in self.over_ride_spec_params: if over_ride_param == "replicas": bucket_spec[Bucket.replicaNumber] = self.num_replicas elif over_ride_param == "remove_default_collection": bucket_spec[MetaConstants.REMOVE_DEFAULT_COLLECTION] = \ self.remove_default_collection elif over_ride_param == "enable_flush": if self.input.param("enable_flush", False): bucket_spec[ Bucket.flushEnabled] = Bucket.FlushBucket.ENABLED else: bucket_spec[ Bucket.flushEnabled] = Bucket.FlushBucket.DISABLED elif over_ride_param == "num_buckets": bucket_spec[MetaConstants.NUM_BUCKETS] = int( self.input.param("num_buckets", 1)) elif over_ride_param == "bucket_size": if self.bucket_size == "auto": cluster_info = self.rest.get_nodes_self() kv_quota = cluster_info.__getattribute__( CbServer.Settings.KV_MEM_QUOTA) self.bucket_size = kv_quota // bucket_spec[ MetaConstants.NUM_BUCKETS] bucket_spec[Bucket.ramQuotaMB] = self.bucket_size elif over_ride_param == "num_scopes": bucket_spec[MetaConstants.NUM_SCOPES_PER_BUCKET] = int( self.input.param("num_scopes", 1)) elif over_ride_param == "num_collections": bucket_spec[MetaConstants.NUM_COLLECTIONS_PER_SCOPE] = int( self.input.param("num_collections", 1)) elif over_ride_param == "num_items": bucket_spec[MetaConstants.NUM_ITEMS_PER_COLLECTION] = \ self.num_items def over_ride_doc_loading_template_params(self, target_spec): for over_ride_param in self.over_ride_spec_params: if over_ride_param == "durability": target_spec[MetaCrudParams.DURABILITY_LEVEL] = \ self.durability_level elif over_ride_param == "sdk_timeout": target_spec[MetaCrudParams.SDK_TIMEOUT] = self.sdk_timeout elif over_ride_param == "doc_size": target_spec[MetaCrudParams.DocCrud.DOC_SIZE] = self.doc_size def set_memory_for_services(self, master_rest, cluster_util, server, services): services = services.split(",") if len(services) > 0: service_mem_dict = { "kv": [CbServer.Settings.KV_MEM_QUOTA, MIN_KV_QUOTA], "fts": [CbServer.Settings.FTS_MEM_QUOTA, FTS_QUOTA], "index": [CbServer.Settings.INDEX_MEM_QUOTA, INDEX_QUOTA], "cbas": [CbServer.Settings.CBAS_MEM_QUOTA, CBAS_QUOTA] } if "n1ql" in services: services.remove("n1ql") # Get all services that are already running in cluster cluster_services = cluster_util.get_services_map(self.cluster) cluster_info = master_rest.get_nodes_self() rest = RestConnection(server) info = rest.get_nodes_self() memory_quota_available = info.mcdMemoryReserved if len(services) == 1: service = services[0] if service in cluster_services: if service is not "kv": self.log.info( "Setting {0} memory quota for {1}".format( memory_quota_available, service)) property_name = service_mem_dict[service][0] service_mem_in_cluster = cluster_info.__getattribute__( property_name) # If service is already in cluster, # we cannot increase the RAM allocation, # but we can reduce the RAM allocation if needed. if memory_quota_available < service_mem_in_cluster: if memory_quota_available > \ service_mem_dict[service][1]: memory_quota = memory_quota_available if service == "cbas": memory_quota = \ memory_quota_available * \ self.cbas_memory_quota_percent / 100 if memory_quota < service_mem_dict[ service][1]: memory_quota = \ service_mem_dict[service][1] master_rest.set_service_mem_quota( {property_name: memory_quota}) else: self.fail("Error while setting service memory " "quota {0} for {1}".format( service_mem_dict[service][1], service)) else: self.log.info("Setting {0} memory quota for {1}".format( memory_quota_available, service)) if memory_quota_available > service_mem_dict[service][1]: memory_quota = memory_quota_available if service == "cbas": memory_quota = \ memory_quota_available * \ self.cbas_memory_quota_percent / 100 if memory_quota < service_mem_dict[service][1]: memory_quota = service_mem_dict[service][1] master_rest.set_service_mem_quota( {service_mem_dict[service][0]: memory_quota}) else: self.fail( "Error while setting service mem quota %s for %s" % (service_mem_dict[service][1], service)) else: # if KV is present, then don't change the KV memory quota # Assuming that KV node will always be present in the master if "kv" in services: services.remove("kv") memory_quota_available -= cluster_info\ .__getattribute__(CbServer.Settings.KV_MEM_QUOTA) set_cbas_mem = False if "cbas" in services: services.remove("cbas") set_cbas_mem = True for service in services: # setting minimum possible memory for other services. self.log.info("Setting {0} memory quota for {1}".format( service_mem_dict[service][1], service)) if memory_quota_available >= service_mem_dict[service][1]: master_rest.set_service_mem_quota({ service_mem_dict[service][0]: service_mem_dict[service][1] }) memory_quota_available -= service_mem_dict[service][1] else: self.fail( "Error while setting service mem quota %s for %s" % (service_mem_dict[service][1], service)) if set_cbas_mem and memory_quota_available >= \ service_mem_dict["cbas"][1]: if "cbas" in cluster_services: if cluster_info.__getattribute__( CbServer.Settings.CBAS_MEM_QUOTA ) >= memory_quota_available: self.log.info( "Setting {0} memory quota for CBAS".format( memory_quota_available)) master_rest.set_service_mem_quota({ CbServer.Settings.CBAS_MEM_QUOTA: memory_quota_available }) else: self.log.info( "Setting {0} memory quota for CBAS".format( memory_quota_available)) master_rest.set_service_mem_quota({ CbServer.Settings.CBAS_MEM_QUOTA: memory_quota_available }) else: self.fail("Error while setting service memory quota {0} " "for CBAS".format(memory_quota_available))
def test_stop_network_ingest_data(self): self.setup_for_test() self.cbas_node_type = self.input.param('cbas_node_type', None) query = "select sleep(count(*),50000) from {0};".format( self.cbas_dataset_name) handles = self.cbas_util._run_concurrent_queries(query, "async", 10) self.ingestion_in_progress() # Add the code for stop network here: if self.cbas_node_type: if self.cbas_node_type == "CC": node_in_test = self.cbas_node self.cbas_util = CbasUtil(self.cluster.master, self.cluster.cbas_nodes[0]) self.cbas_util.createConn("default") else: node_in_test = self.cluster.cbas_nodes[0] # Stop network on KV node to mimic n/w partition on KV else: node_in_test = self.cluster.master items_in_cbas_bucket_before, _ = self.cbas_util.get_num_items_in_cbas_dataset( self.cbas_dataset_name) self.log.info("Intems before network down: %s" % items_in_cbas_bucket_before) RemoteMachineShellConnection(node_in_test).stop_network("30") # self.sleep(40, "Wait for network to come up.") items_in_cbas_bucket = 0 start_time = time.time() while (items_in_cbas_bucket == 0 or items_in_cbas_bucket == -1) and time.time() < start_time + 60: try: items_in_cbas_bucket, _ = self.cbas_util.get_num_items_in_cbas_dataset( self.cbas_dataset_name) except: pass # items_in_cbas_bucket_after, _ = self.cbas_util.get_num_items_in_cbas_dataset(self.cbas_dataset_name) self.log.info("Items after network is up: %s" % items_in_cbas_bucket) # start_time = time.time() # while items_in_cbas_bucket_after <=0 and time.time()<start_time+60: # items_in_cbas_bucket_after, _ = self.cbas_util.get_num_items_in_cbas_dataset(self.cbas_dataset_name) # self.sleep(1) # items_in_cbas_bucket = items_in_cbas_bucket_after if items_in_cbas_bucket < self.num_items * 3 and items_in_cbas_bucket > self.num_items: self.log.info("Data Ingestion Interrupted successfully") elif items_in_cbas_bucket < self.num_items: self.log.info( "Data Ingestion did not interrupted but restarting from 0.") else: self.log.info( "Data Ingestion did not interrupted but complete before service restart." ) run_count = 0 fail_count = 0 success_count = 0 aborted_count = 0 shell = RemoteMachineShellConnection(node_in_test) for handle in handles: status, hand = self.cbas_util.retrieve_request_status_using_handle( node_in_test, handle, shell) if status == "running": run_count += 1 self.log.info("query with handle %s is running." % handle) elif status == "failed": fail_count += 1 self.log.info("query with handle %s is failed." % handle) elif status == "success": success_count += 1 self.log.info("query with handle %s is successful." % handle) else: aborted_count += 1 self.log.info("Queued job is deleted: %s" % status) self.log.info("After service restart %s queued jobs are Running." % run_count) self.log.info("After service restart %s queued jobs are Failed." % fail_count) self.log.info("After service restart %s queued jobs are Successful." % success_count) self.log.info("After service restart %s queued jobs are Aborted." % aborted_count) if self.cbas_node_type == "NC": self.assertTrue(fail_count + aborted_count == 0, "Some queries failed/aborted") query = "select count(*) from {0};".format(self.cbas_dataset_name) self.cbas_util._run_concurrent_queries(query, "immediate", 100) if not self.cbas_util.validate_cbas_dataset_items_count( self.cbas_dataset_name, self.num_items * 3): self.fail( "No. of items in CBAS dataset do not match that in the CB bucket" )
def test_stop_start_service_ingest_data(self): self.setup_for_test() self.cbas_node_type = self.input.param('cbas_node_type', None) query = "select sleep(count(*),50000) from {0};".format( self.cbas_dataset_name) handles = self.cbas_util._run_concurrent_queries(query, "async", 10) self.ingestion_in_progress() if self.cbas_node_type == "CC": node_in_test = self.cbas_node self.cbas_util.closeConn() self.cbas_util = CbasUtil(self.cluster.master, self.cluster.cbas_nodes[0]) self.cbas_util.createConn("default") else: node_in_test = self.cluster.cbas_nodes[0] items_in_cbas_bucket, _ = self.cbas_util.get_num_items_in_cbas_dataset( self.cbas_dataset_name) self.log.info("Items before service restart: %s" % items_in_cbas_bucket) self.log.info("Gracefully stopping service on node %s" % node_in_test) NodeHelper.stop_couchbase(node_in_test) NodeHelper.start_couchbase(node_in_test) NodeHelper.wait_service_started(node_in_test) # self.sleep(10, "wait for service to come up.") # # items_in_cbas_bucket, _ = self.cbas_util.get_num_items_in_cbas_dataset(self.cbas_dataset_name) # self.log.info("After graceful STOPPING/STARTING service docs in CBAS bucket : %s"%items_in_cbas_bucket) # # start_time = time.time() # while items_in_cbas_bucket <=0 and time.time()<start_time+60: # items_in_cbas_bucket, _ = self.cbas_util.get_num_items_in_cbas_dataset(self.cbas_dataset_name) # self.sleep(1) items_in_cbas_bucket = 0 start_time = time.time() while (items_in_cbas_bucket == 0 or items_in_cbas_bucket == -1) and time.time() < start_time + 60: try: items_in_cbas_bucket, _ = self.cbas_util.get_num_items_in_cbas_dataset( self.cbas_dataset_name) except: pass if items_in_cbas_bucket < self.num_items * 3 and items_in_cbas_bucket > self.num_items: self.log.info("Data Ingestion Interrupted successfully") elif items_in_cbas_bucket < self.num_items: self.log.info( "Data Ingestion did not interrupted but restarting from 0.") else: self.log.info( "Data Ingestion did not interrupted but complete before service restart." ) run_count = 0 fail_count = 0 success_count = 0 aborted_count = 0 shell = RemoteMachineShellConnection(node_in_test) for handle in handles: status, hand = self.cbas_util.retrieve_request_status_using_handle( node_in_test, handle, shell) if status == "running": run_count += 1 self.log.info("query with handle %s is running." % handle) elif status == "failed": fail_count += 1 self.log.info("query with handle %s is failed." % handle) elif status == "success": success_count += 1 self.log.info("query with handle %s is successful." % handle) else: aborted_count += 1 self.log.info("Queued job is deleted: %s" % status) self.log.info("After service restart %s queued jobs are Running." % run_count) self.log.info("After service restart %s queued jobs are Failed." % fail_count) self.log.info("After service restart %s queued jobs are Successful." % success_count) self.log.info("After service restart %s queued jobs are Aborted." % aborted_count) if self.cbas_node_type == "NC": self.assertTrue(fail_count + aborted_count == 0, "Some queries failed/aborted") query = "select count(*) from {0};".format(self.cbas_dataset_name) self.cbas_util._run_concurrent_queries(query, "immediate", 100) if not self.cbas_util.validate_cbas_dataset_items_count( self.cbas_dataset_name, self.num_items * 3): self.fail( "No. of items in CBAS dataset do not match that in the CB bucket" )
def setUp(self): super(UpgradeTests, self).setUp() self.cbas_util = CbasUtil(self.task) self.cbas_spec_name = self.input.param("cbas_spec", "local_datasets") self.rebalance_util = CBASRebalanceUtil(self.cluster_util, self.bucket_util, self.task, vbucket_check=True, cbas_util=self.cbas_util) if self.input.param("n2n_encryption", False): CbServer.use_https = True trust_all_certs() self.security_util = SecurityUtils(self.log) rest = RestConnection(self.cluster.master) self.log.info("Disabling Auto-Failover") if not rest.update_autofailover_settings(False, 120): self.fail("Disabling Auto-Failover failed") self.log.info("Setting node to node encryption level to all") self.security_util.set_n2n_encryption_level_on_nodes( self.cluster.nodes_in_cluster, level="all") CbServer.use_https = True self.log.info("Enabling Auto-Failover") if not rest.update_autofailover_settings(True, 300): self.fail("Enabling Auto-Failover failed") cbas_cc_node_ip = None retry = 0 self.cluster.cbas_nodes = \ self.cluster_util.get_nodes_from_services_map( self.cluster, service_type="cbas", get_all_nodes=True, servers=self.cluster.nodes_in_cluster) while True and retry < 60: cbas_cc_node_ip = self.cbas_util.retrieve_cc_ip_from_master( self.cluster) if cbas_cc_node_ip: break else: self.sleep(10, "Waiting for CBAS service to come up") retry += 1 if not cbas_cc_node_ip: self.fail("CBAS service did not come up even after 10 " "mins.") for server in self.cluster.cbas_nodes: if server.ip == cbas_cc_node_ip: self.cluster.cbas_cc_node = server break if not self.cbas_util.wait_for_cbas_to_recover(self.cluster, timeout=300): self.fail("Analytics service failed to start post adding cbas " "nodes to cluster") self.pre_upgrade_setup() self.log_setup_status(self.__class__.__name__, "Finished", stage=self.setUp.__name__)
def test_rebalance_kv_rollback_create_ops(self): self.setup_for_test() items_before_persistence_stop = self.cbas_util.get_num_items_in_cbas_dataset( self.cbas_dataset_name)[0] self.log.info("Items in CBAS before persistence stop: %s" % items_before_persistence_stop) # Stop Persistence on Node A & Node B self.log.info("Stopping persistence on NodeA") mem_client = MemcachedClientHelper.direct_client( self.cluster.master, self.cb_bucket_name) mem_client.stop_persistence() # Perform Create, Update, Delete ops in the CB bucket self.log.info("Performing Mutations") self.perform_doc_ops_in_all_cb_buckets("create", self.num_items, self.num_items * 3 / 2) kv_nodes = self.get_kv_nodes(self.servers, self.cluster.master) items_in_cb_bucket = 0 if self.where_field and self.where_value: items_in_cb_bucket = RestConnection( self.cluster.master).query_tool( 'select count(*) from %s where %s = "%s"' % (self.cb_bucket_name, self.where_field, self.where_value))['results'][0]['$1'] else: for node in kv_nodes: items_in_cb_bucket += self.get_item_count_mc( node, self.cb_bucket_name) # Validate no. of items in CBAS dataset self.assertTrue( self.cbas_util.validate_cbas_dataset_items_count( self.cbas_dataset_name, items_in_cb_bucket, 0), "No. of items in CBAS dataset do not match that in the CB bucket") # Count no. of items in CB & CBAS Buckets items_in_cbas_bucket, _ = self.cbas_util.get_num_items_in_cbas_dataset( self.cbas_dataset_name) self.log.info( "Before Rollback --- # docs in CB bucket : %s, # docs in CBAS bucket : %s", items_in_cb_bucket, items_in_cbas_bucket) self.assertTrue( items_in_cb_bucket == items_in_cbas_bucket, "Before Rollback : # Items in CBAS bucket does not match that in the CB bucket" ) if self.CC: self.cluster_util.remove_node([self.otpNodes[0]], wait_for_rebalance=False) self.cbas_util.closeConn() self.cbas_util = CbasUtil(self.cluster.master, self.cluster.cbas_nodes[0]) self.cbas_util.createConn("default") else: self.cluster_util.remove_node([self.otpNodes[1]], wait_for_rebalance=False) # Kill memcached on Node A so that Node B becomes master self.log.info("Kill Memcached process on NodeA") shell = RemoteMachineShellConnection(self.cluster.master) shell.kill_memcached() self.sleep(2, "Wait for 2 secs for DCP rollback sent to CBAS.") curr = time.time() while items_in_cbas_bucket == -1 or ( items_in_cbas_bucket != 0 and items_in_cbas_bucket > items_before_persistence_stop): try: if curr + 120 < time.time(): break items_in_cbas_bucket, _ = self.cbas_util.get_num_items_in_cbas_dataset( self.cbas_dataset_name) self.log.info("Items in CBAS: %s" % items_in_cbas_bucket) except: self.log.info( "Probably rebalance is in progress and the reason for queries being failing." ) pass self.assertTrue(items_in_cbas_bucket <= items_before_persistence_stop, "Roll-back did not happen.") self.log.info("#######BINGO########\nROLLBACK HAPPENED") items_in_cb_bucket = 0 curr = time.time() while items_in_cb_bucket != items_in_cbas_bucket or items_in_cb_bucket == 0: items_in_cb_bucket = 0 items_in_cbas_bucket = 0 if self.where_field and self.where_value: try: items_in_cb_bucket = RestConnection( self.cluster.master).query_tool( 'select count(*) from %s where %s = "%s"' % (self.cb_bucket_name, self.where_field, self.where_value))['results'][0]['$1'] except: self.log.info( "Indexer in rollback state. Query failed. Pass and move ahead." ) pass else: for node in kv_nodes: items_in_cb_bucket += self.get_item_count_mc( node, self.cb_bucket_name) self.log.info("Items in CB bucket after rollback: %s" % items_in_cb_bucket) try: items_in_cbas_bucket, _ = self.cbas_util.get_num_items_in_cbas_dataset( self.cbas_dataset_name) except: pass if curr + 120 < time.time(): break str_time = time.time() while self.rest._rebalance_progress_status( ) == "running" and time.time() < str_time + 300: self.sleep(1) self.log.info("Waiting for rebalance to complete") self.log.info( "After Rollback --- # docs in CB bucket : %s, # docs in CBAS bucket : %s", items_in_cb_bucket, items_in_cbas_bucket) self.assertTrue( items_in_cb_bucket == items_in_cbas_bucket, "After Rollback : # Items in CBAS bucket does not match that in the CB bucket" )
class PartialRollback_CBAS(CBASBaseTest): def setUp(self): self.input = TestInputSingleton.input self.input.test_params.update({"default_bucket": False}) super(PartialRollback_CBAS, self).setUp() ''' Considering all the scenarios where: 1. There can be 1 KV and multiple cbas nodes(and tests wants to add all cbas into cluster.) 2. There can be 1 KV and multiple cbas nodes(and tests wants only 1 cbas node) 3. There can be only 1 node running KV,CBAS service. NOTE: Cases pending where there are nodes which are running only cbas. For that service check on nodes is needed. ''' if "add_all_cbas_nodes" in self.input.test_params and self.input.test_params[ "add_all_cbas_nodes"] and len(self.cluster.cbas_nodes) > 0: self.otpNodes.extend( self.add_all_nodes_then_rebalance(self.cluster.cbas_nodes)) '''Create default bucket''' self.bucket_util.create_default_bucket(storage=self.bucket_storage) self.cbas_util.createConn("default") self.merge_policy = self.input.param('merge_policy', None) self.max_mergable_component_size = self.input.param( 'max_mergable_component_size', 16384) self.max_tolerance_component_count = self.input.param( 'max_tolerance_component_count', 2) self.create_index = self.input.param('create_index', False) self.where_field = self.input.param('where_field', None) self.where_value = self.input.param('where_value', None) self.CC = self.input.param('CC', False) def setup_for_test(self, skip_data_loading=False): if not skip_data_loading: # Load Couchbase bucket first. self.perform_doc_ops_in_all_cb_buckets("create", 0, self.num_items, batch_size=1000) # Create dataset on the CBAS bucket if self.merge_policy == None: self.cbas_util.create_dataset_on_bucket( cbas_bucket_name=self.cb_bucket_name, where_field=self.where_field, where_value=self.where_value, cbas_dataset_name=self.cbas_dataset_name) else: self.cbas_util.create_dataset_on_bucket_merge_policy( cbas_bucket_name=self.cb_bucket_name, where_field=self.where_field, where_value=self.where_value, cbas_dataset_name=self.cbas_dataset_name, merge_policy=self.merge_policy, max_mergable_component_size=self.max_mergable_component_size, max_tolerance_component_count=self. max_tolerance_component_count) if self.create_index: create_idx_statement = "create index {0} on {1}({2});".format( self.index_name, self.cbas_dataset_name, "profession:string") status, metrics, errors, results, _ = self.cbas_util.execute_statement_on_cbas_util( create_idx_statement) # Connect to Bucket self.cbas_util.connect_to_bucket( cbas_bucket_name=self.cbas_bucket_name, cb_bucket_password=self.cb_bucket_password) if not skip_data_loading: result = RestConnection(self.cluster.master).query_tool( "CREATE INDEX {0} ON {1}({2})".format(self.index_name, self.cb_bucket_name, "profession")) self.sleep(20, "wait for index creation.") self.assertTrue(result['status'] == "success") if self.where_field and self.where_value: items = RestConnection(self.cluster.master).query_tool( 'select count(*) from %s where %s = "%s"' % (self.cb_bucket_name, self.where_field, self.where_value))['results'][0]['$1'] else: items = self.num_items # Validate no. of items in CBAS dataset if not self.cbas_util.validate_cbas_dataset_items_count( self.cbas_dataset_name, items): self.fail( "No. of items in CBAS dataset do not match that in the CB bucket" ) def tearDown(self): super(PartialRollback_CBAS, self).tearDown() def test_ingestion_after_kv_rollback_create_ops(self): self.setup_for_test() items_before_persistence_stop = self.cbas_util.get_num_items_in_cbas_dataset( self.cbas_dataset_name)[0] self.log.info("Items in CBAS before persistence stop: %s" % items_before_persistence_stop) # Stop Persistence on Node A & Node B self.log.info("Stopping persistence on NodeA") mem_client = MemcachedClientHelper.direct_client( self.cluster.master, self.cb_bucket_name) mem_client.stop_persistence() # Perform Create, Update, Delete ops in the CB bucket self.log.info("Performing Mutations") self.perform_doc_ops_in_all_cb_buckets("create", self.num_items, self.num_items * 3 / 2) kv_nodes = self.get_kv_nodes(self.servers, self.cluster.master) items_in_cb_bucket = 0 if self.where_field and self.where_value: items_in_cb_bucket = RestConnection( self.cluster.master).query_tool( 'select count(*) from %s where %s = "%s"' % (self.cb_bucket_name, self.where_field, self.where_value))['results'][0]['$1'] else: for node in kv_nodes: items_in_cb_bucket += self.get_item_count_mc( node, self.cb_bucket_name) # Validate no. of items in CBAS dataset self.assertTrue( self.cbas_util.validate_cbas_dataset_items_count( self.cbas_dataset_name, items_in_cb_bucket, 0), "No. of items in CBAS dataset do not match that in the CB bucket") # Count no. of items in CB & CBAS Buckets items_in_cbas_bucket, _ = self.cbas_util.get_num_items_in_cbas_dataset( self.cbas_dataset_name) self.log.info( "Before Rollback --- # docs in CB bucket : %s, # docs in CBAS bucket : %s", items_in_cb_bucket, items_in_cbas_bucket) self.assertTrue( items_in_cb_bucket == items_in_cbas_bucket, "Before Rollback : # Items in CBAS bucket does not match that in the CB bucket" ) # Kill memcached on Node A so that Node B becomes master self.log.info("Kill Memcached process on NodeA") shell = RemoteMachineShellConnection(self.cluster.master) shell.kill_memcached() self.sleep(2, "Wait for 2 secs for DCP rollback sent to CBAS.") curr = time.time() while items_in_cbas_bucket != 0 and items_in_cbas_bucket > items_before_persistence_stop: items_in_cbas_bucket, _ = self.cbas_util.get_num_items_in_cbas_dataset( self.cbas_dataset_name) if curr + 120 < time.time(): break self.assertTrue(items_in_cbas_bucket <= items_before_persistence_stop, "Roll-back did not happen.") self.log.info("#######BINGO########\nROLLBACK HAPPENED") items_in_cb_bucket = 0 curr = time.time() while items_in_cb_bucket != items_in_cbas_bucket: items_in_cb_bucket = 0 items_in_cbas_bucket = 0 if self.where_field and self.where_value: try: items_in_cb_bucket = RestConnection( self.cluster.master).query_tool( 'select count(*) from %s where %s = "%s"' % (self.cb_bucket_name, self.where_field, self.where_value))['results'][0]['$1'] except: self.log.info( "Indexer in rollback state. Query failed. Pass and move ahead." ) pass else: for node in kv_nodes: items_in_cb_bucket += self.get_item_count_mc( node, self.cb_bucket_name) self.log.info("Items in CB bucket after rollback: %s" % items_in_cb_bucket) items_in_cbas_bucket, _ = self.cbas_util.get_num_items_in_cbas_dataset( self.cbas_dataset_name) if curr + 120 < time.time(): break self.log.info( "After Rollback --- # docs in CB bucket : %s, # docs in CBAS bucket : %s", items_in_cb_bucket, items_in_cbas_bucket) self.assertTrue( items_in_cb_bucket == items_in_cbas_bucket, "After Rollback : # Items in CBAS bucket does not match that in the CB bucket" ) def test_ingestion_after_kv_rollback_create_ops_MB29860(self): self.setup_for_test() items_before_persistence_stop = self.cbas_util.get_num_items_in_cbas_dataset( self.cbas_dataset_name)[0] self.log.info("Items in CBAS before persistence stop: %s" % items_before_persistence_stop) # Stop Persistence on Node A & Node B self.log.info("Stopping persistence on NodeA") mem_client = MemcachedClientHelper.direct_client( self.cluster.master, self.cb_bucket_name) mem_client.stop_persistence() # Perform Create, Update, Delete ops in the CB bucket self.log.info("Performing Mutations") self.perform_doc_ops_in_all_cb_buckets("create", self.num_items, self.num_items * 3 / 2) kv_nodes = self.get_kv_nodes(self.servers, self.cluster.master) items_in_cb_bucket = 0 if self.where_field and self.where_value: items_in_cb_bucket = RestConnection( self.cluster.master).query_tool( 'select count(*) from %s where %s = "%s"' % (self.cb_bucket_name, self.where_field, self.where_value))['results'][0]['$1'] else: for node in kv_nodes: items_in_cb_bucket += self.get_item_count_mc( node, self.cb_bucket_name) # Validate no. of items in CBAS dataset self.assertTrue( self.cbas_util.validate_cbas_dataset_items_count( self.cbas_dataset_name, items_in_cb_bucket, 0), "No. of items in CBAS dataset do not match that in the CB bucket") # Count no. of items in CB & CBAS Buckets items_in_cbas_bucket, _ = self.cbas_util.get_num_items_in_cbas_dataset( self.cbas_dataset_name) self.log.info( "Before Rollback --- # docs in CB bucket : %s, # docs in CBAS bucket : %s", items_in_cb_bucket, items_in_cbas_bucket) self.assertTrue( items_in_cb_bucket == items_in_cbas_bucket, "Before Rollback : # Items in CBAS bucket does not match that in the CB bucket" ) self.cbas_util.disconnect_from_bucket(self.cbas_bucket_name) # Kill memcached on Node A so that Node B becomes master self.log.info("Kill Memcached process on NodeA") shell = RemoteMachineShellConnection(self.cluster.master) shell.kill_memcached() if self.input.param('kill_cbas', False): shell = RemoteMachineShellConnection(self.cbas_node) shell.kill_process("/opt/couchbase/lib/cbas/runtime/bin/java", "java") shell.kill_process("/opt/couchbase/bin/cbas", "cbas") tries = 60 result = False while tries > 0 and not result: try: result = self.cbas_util.connect_to_bucket( self.cbas_bucket_name) tries -= 1 except: pass self.sleep(2) self.assertTrue( result, "CBAS connect bucket failed after memcached killed on KV node.") self.sleep(2, "Wait for 2 secs for DCP rollback sent to CBAS.") curr = time.time() while items_in_cbas_bucket != 0 and items_in_cbas_bucket > items_before_persistence_stop: items_in_cbas_bucket, _ = self.cbas_util.get_num_items_in_cbas_dataset( self.cbas_dataset_name) if curr + 120 < time.time(): break self.assertTrue(items_in_cbas_bucket <= items_before_persistence_stop, "Roll-back did not happen.") self.log.info("#######BINGO########\nROLLBACK HAPPENED") items_in_cb_bucket = 0 curr = time.time() while items_in_cb_bucket != items_in_cbas_bucket: items_in_cb_bucket = 0 items_in_cbas_bucket = 0 if self.where_field and self.where_value: try: items_in_cb_bucket = RestConnection( self.cluster.master).query_tool( 'select count(*) from %s where %s = "%s"' % (self.cb_bucket_name, self.where_field, self.where_value))['results'][0]['$1'] except: self.log.info( "Indexer in rollback state. Query failed. Pass and move ahead." ) pass else: for node in kv_nodes: items_in_cb_bucket += self.get_item_count_mc( node, self.cb_bucket_name) self.log.info("Items in CB bucket after rollback: %s" % items_in_cb_bucket) items_in_cbas_bucket, _ = self.cbas_util.get_num_items_in_cbas_dataset( self.cbas_dataset_name) if curr + 120 < time.time(): break self.log.info( "After Rollback --- # docs in CB bucket : %s, # docs in CBAS bucket : %s", items_in_cb_bucket, items_in_cbas_bucket) self.assertTrue( items_in_cb_bucket == items_in_cbas_bucket, "After Rollback : # Items in CBAS bucket does not match that in the CB bucket" ) def test_ingestion_after_kv_rollback_delete_ops(self): self.setup_for_test() # Stop Persistence on Node A & Node B self.log.info("Stopping persistence on NodeA") mem_client = MemcachedClientHelper.direct_client( self.cluster.master, self.cb_bucket_name) mem_client.stop_persistence() # Perform Create, Update, Delete ops in the CB bucket self.log.info("Performing Mutations") self.perform_doc_ops_in_all_cb_buckets("delete", 0, self.num_items / 2) kv_nodes = self.get_kv_nodes(self.servers, self.cluster.master) items_in_cb_bucket = 0 if self.where_field and self.where_value: items_in_cb_bucket = RestConnection( self.cluster.master).query_tool( 'select count(*) from %s where %s = "%s"' % (self.cb_bucket_name, self.where_field, self.where_value))['results'][0]['$1'] else: for node in kv_nodes: items_in_cb_bucket += self.get_item_count_mc( node, self.cb_bucket_name) # Validate no. of items in CBAS dataset self.assertTrue( self.cbas_util.validate_cbas_dataset_items_count( self.cbas_dataset_name, items_in_cb_bucket, 0), "No. of items in CBAS dataset do not match that in the CB bucket") # Count no. of items in CB & CBAS Buckets items_in_cbas_bucket, _ = self.cbas_util.get_num_items_in_cbas_dataset( self.cbas_dataset_name) items_before_rollback = items_in_cbas_bucket self.log.info( "Before Rollback --- # docs in CB bucket : %s, # docs in CBAS bucket : %s", items_in_cb_bucket, items_in_cbas_bucket) self.assertTrue( items_in_cb_bucket == items_in_cbas_bucket, "Before Rollback : # Items in CBAS bucket does not match that in the CB bucket" ) # Kill memcached on Node A so that Node B becomes master self.log.info("Kill Memcached process on NodeA") shell = RemoteMachineShellConnection(self.cluster.master) shell.kill_memcached() self.sleep(2, "Wait for 2 secs for DCP rollback sent to CBAS.") curr = time.time() while items_in_cbas_bucket != 0 and items_in_cbas_bucket <= items_before_rollback: items_in_cbas_bucket, _ = self.cbas_util.get_num_items_in_cbas_dataset( self.cbas_dataset_name) if curr + 120 < time.time(): break self.assertTrue(items_in_cbas_bucket > items_before_rollback, "Roll-back did not happen.") self.log.info("#######BINGO########\nROLLBACK HAPPENED") items_in_cb_bucket = 0 curr = time.time() while items_in_cb_bucket != items_in_cbas_bucket: items_in_cb_bucket = 0 items_in_cbas_bucket = 0 if self.where_field and self.where_value: try: items_in_cb_bucket = RestConnection( self.cluster.master).query_tool( 'select count(*) from %s where %s = "%s"' % (self.cb_bucket_name, self.where_field, self.where_value))['results'][0]['$1'] except: self.log.info( "Indexer in rollback state. Query failed. Pass and move ahead." ) pass else: for node in kv_nodes: items_in_cb_bucket += self.get_item_count_mc( node, self.cb_bucket_name) self.log.info("Items in CB bucket after rollback: %s" % items_in_cb_bucket) items_in_cbas_bucket, _ = self.cbas_util.get_num_items_in_cbas_dataset( self.cbas_dataset_name) if curr + 120 < time.time(): break self.log.info( "After Rollback --- # docs in CB bucket : %s, # docs in CBAS bucket : %s", items_in_cb_bucket, items_in_cbas_bucket) self.assertTrue( items_in_cb_bucket == items_in_cbas_bucket, "After Rollback : # Items in CBAS bucket does not match that in the CB bucket" ) def test_ingestion_after_kv_rollback_cbas_disconnected(self): self.setup_for_test() # Stop Persistence on Node A & Node B self.log.info("Stopping persistence on NodeA") mem_client = MemcachedClientHelper.direct_client( self.cluster.master, self.cb_bucket_name) mem_client.stop_persistence() # Perform Create, Update, Delete ops in the CB bucket self.log.info("Performing Mutations") self.perform_doc_ops_in_all_cb_buckets("delete", 0, self.num_items / 2) # Count no. of items in CB & CBAS Buckets kv_nodes = self.get_kv_nodes(self.servers, self.cluster.master) items_in_cb_bucket = 0 for node in kv_nodes: items_in_cb_bucket += self.get_item_count_mc( node, self.cb_bucket_name) items_in_cbas_bucket, _ = self.cbas_util.get_num_items_in_cbas_dataset( self.cbas_dataset_name) items_before_rollback = items_in_cbas_bucket self.log.info( "Before Rollback --- # docs in CB bucket : %s, # docs in CBAS bucket : %s", items_in_cb_bucket, items_in_cbas_bucket) self.assertTrue( items_in_cb_bucket == items_in_cbas_bucket, "Before Rollback : # Items in CBAS bucket does not match that in the CB bucket" ) self.cbas_util.disconnect_from_bucket(self.cbas_bucket_name) # Kill memcached on Node A so that Node B becomes master self.log.info("Kill Memcached process on NodeA") shell = RemoteMachineShellConnection(self.cluster.master) shell.kill_memcached() # self.sleep(10,"Wait for 10 secs for memcached restarts.") if self.input.param('kill_cbas', False): shell = RemoteMachineShellConnection(self.cbas_node) shell.kill_process("/opt/couchbase/lib/cbas/runtime/bin/java", "java") shell.kill_process("/opt/couchbase/bin/cbas", "cbas") tries = 60 result = False while tries > 0 and not result: try: result = self.cbas_util.connect_to_bucket( self.cbas_bucket_name) tries -= 1 except: pass self.sleep(2) self.assertTrue( result, "CBAS connect bucket failed after memcached killed on KV node.") curr = time.time() while items_in_cbas_bucket != 0 and items_in_cbas_bucket <= items_before_rollback: items_in_cbas_bucket, _ = self.cbas_util.get_num_items_in_cbas_dataset( self.cbas_dataset_name) if curr + 120 < time.time(): break self.assertTrue(items_in_cbas_bucket > items_before_rollback, "Roll-back did not happen.") self.log.info("#######BINGO########\nROLLBACK HAPPENED") curr = time.time() while items_in_cb_bucket != items_in_cbas_bucket: items_in_cb_bucket = 0 items_in_cbas_bucket = 0 if self.where_field and self.where_value: try: items_in_cb_bucket = RestConnection( self.cluster.master).query_tool( 'select count(*) from %s where %s = "%s"' % (self.cb_bucket_name, self.where_field, self.where_value))['results'][0]['$1'] except: self.log.info( "Indexer in rollback state. Query failed. Pass and move ahead." ) pass else: for node in kv_nodes: items_in_cb_bucket += self.get_item_count_mc( node, self.cb_bucket_name) self.log.info("Items in CB bucket after rollback: %s" % items_in_cb_bucket) items_in_cbas_bucket, _ = self.cbas_util.get_num_items_in_cbas_dataset( self.cbas_dataset_name) if curr + 120 < time.time(): break # Count no. of items in CB & CBAS Buckets items_in_cbas_bucket, _ = self.cbas_util.get_num_items_in_cbas_dataset( self.cbas_dataset_name) self.log.info( "After Rollback --- # docs in CB bucket : %s, # docs in CBAS bucket : %s", items_in_cb_bucket, items_in_cbas_bucket) self.assertTrue( items_in_cb_bucket == items_in_cbas_bucket, "After Rollback : # Items in CBAS bucket does not match that in the CB bucket" ) def test_rebalance_kv_rollback_create_ops(self): self.setup_for_test() items_before_persistence_stop = self.cbas_util.get_num_items_in_cbas_dataset( self.cbas_dataset_name)[0] self.log.info("Items in CBAS before persistence stop: %s" % items_before_persistence_stop) # Stop Persistence on Node A & Node B self.log.info("Stopping persistence on NodeA") mem_client = MemcachedClientHelper.direct_client( self.cluster.master, self.cb_bucket_name) mem_client.stop_persistence() # Perform Create, Update, Delete ops in the CB bucket self.log.info("Performing Mutations") self.perform_doc_ops_in_all_cb_buckets("create", self.num_items, self.num_items * 3 / 2) kv_nodes = self.get_kv_nodes(self.servers, self.cluster.master) items_in_cb_bucket = 0 if self.where_field and self.where_value: items_in_cb_bucket = RestConnection( self.cluster.master).query_tool( 'select count(*) from %s where %s = "%s"' % (self.cb_bucket_name, self.where_field, self.where_value))['results'][0]['$1'] else: for node in kv_nodes: items_in_cb_bucket += self.get_item_count_mc( node, self.cb_bucket_name) # Validate no. of items in CBAS dataset self.assertTrue( self.cbas_util.validate_cbas_dataset_items_count( self.cbas_dataset_name, items_in_cb_bucket, 0), "No. of items in CBAS dataset do not match that in the CB bucket") # Count no. of items in CB & CBAS Buckets items_in_cbas_bucket, _ = self.cbas_util.get_num_items_in_cbas_dataset( self.cbas_dataset_name) self.log.info( "Before Rollback --- # docs in CB bucket : %s, # docs in CBAS bucket : %s", items_in_cb_bucket, items_in_cbas_bucket) self.assertTrue( items_in_cb_bucket == items_in_cbas_bucket, "Before Rollback : # Items in CBAS bucket does not match that in the CB bucket" ) if self.CC: self.cluster_util.remove_node([self.otpNodes[0]], wait_for_rebalance=False) self.cbas_util.closeConn() self.cbas_util = CbasUtil(self.cluster.master, self.cluster.cbas_nodes[0]) self.cbas_util.createConn("default") else: self.cluster_util.remove_node([self.otpNodes[1]], wait_for_rebalance=False) # Kill memcached on Node A so that Node B becomes master self.log.info("Kill Memcached process on NodeA") shell = RemoteMachineShellConnection(self.cluster.master) shell.kill_memcached() self.sleep(2, "Wait for 2 secs for DCP rollback sent to CBAS.") curr = time.time() while items_in_cbas_bucket == -1 or ( items_in_cbas_bucket != 0 and items_in_cbas_bucket > items_before_persistence_stop): try: if curr + 120 < time.time(): break items_in_cbas_bucket, _ = self.cbas_util.get_num_items_in_cbas_dataset( self.cbas_dataset_name) self.log.info("Items in CBAS: %s" % items_in_cbas_bucket) except: self.log.info( "Probably rebalance is in progress and the reason for queries being failing." ) pass self.assertTrue(items_in_cbas_bucket <= items_before_persistence_stop, "Roll-back did not happen.") self.log.info("#######BINGO########\nROLLBACK HAPPENED") items_in_cb_bucket = 0 curr = time.time() while items_in_cb_bucket != items_in_cbas_bucket or items_in_cb_bucket == 0: items_in_cb_bucket = 0 items_in_cbas_bucket = 0 if self.where_field and self.where_value: try: items_in_cb_bucket = RestConnection( self.cluster.master).query_tool( 'select count(*) from %s where %s = "%s"' % (self.cb_bucket_name, self.where_field, self.where_value))['results'][0]['$1'] except: self.log.info( "Indexer in rollback state. Query failed. Pass and move ahead." ) pass else: for node in kv_nodes: items_in_cb_bucket += self.get_item_count_mc( node, self.cb_bucket_name) self.log.info("Items in CB bucket after rollback: %s" % items_in_cb_bucket) try: items_in_cbas_bucket, _ = self.cbas_util.get_num_items_in_cbas_dataset( self.cbas_dataset_name) except: pass if curr + 120 < time.time(): break str_time = time.time() while self.rest._rebalance_progress_status( ) == "running" and time.time() < str_time + 300: self.sleep(1) self.log.info("Waiting for rebalance to complete") self.log.info( "After Rollback --- # docs in CB bucket : %s, # docs in CBAS bucket : %s", items_in_cb_bucket, items_in_cbas_bucket) self.assertTrue( items_in_cb_bucket == items_in_cbas_bucket, "After Rollback : # Items in CBAS bucket does not match that in the CB bucket" )
class CBASBaseTest(BaseTestCase): def setUp(self, add_default_cbas_node=True): super(CBASBaseTest, self).setUp() if self._testMethodDoc: self.log.info("Starting Test: %s - %s" % (self._testMethodName, self._testMethodDoc)) else: self.log.info("Starting Test: %s" % self._testMethodName) for server in self.cluster.servers: if "cbas" in server.services: self.cluster.cbas_nodes.append(server) if "kv" in server.services: self.cluster.kv_nodes.append(server) rest = RestConnection(server) rest.set_data_path(data_path=server.data_path, index_path=server.index_path, cbas_path=server.cbas_path) invalid_ip = '10.111.151.109' self._cb_cluster = self.task self.cb_bucket_name = self.input.param('cb_bucket_name', 'travel-sample') self.sample_bucket_dict = {TravelSample().name: TravelSample(), BeerSample().name: BeerSample()} self.sample_bucket = None self.cbas_bucket_name = self.input.param('cbas_bucket_name', 'travel') self.cb_bucket_password = self.input.param('cb_bucket_password', None) self.expected_error = self.input.param("error", None) if self.expected_error: self.expected_error = self.expected_error.replace("INVALID_IP", invalid_ip) self.expected_error = \ self.expected_error.replace("PORT", self.cluster.master.port) self.cb_server_ip = self.input.param("cb_server_ip", None) self.cb_server_ip = \ self.cb_server_ip.replace('INVALID_IP', invalid_ip) \ if self.cb_server_ip is not None else None self.cbas_dataset_name = self.input.param("cbas_dataset_name", 'travel_ds') self.cbas_bucket_name_invalid = \ self.input.param('cbas_bucket_name_invalid', self.cbas_bucket_name) self.cbas_dataset2_name = self.input.param('cbas_dataset2_name', None) self.skip_create_dataset = self.input.param('skip_create_dataset', False) self.disconnect_if_connected = \ self.input.param('disconnect_if_connected', False) self.cbas_dataset_name_invalid = \ self.input.param('cbas_dataset_name_invalid', self.cbas_dataset_name) self.skip_drop_connection = self.input.param('skip_drop_connection', False) self.skip_drop_dataset = self.input.param('skip_drop_dataset', False) self.query_id = self.input.param('query_id', None) self.mode = self.input.param('mode', None) self.num_concurrent_queries = self.input.param('num_queries', 5000) self.concurrent_batch_size = self.input.param('concurrent_batch_size', 100) self.compiler_param = self.input.param('compiler_param', None) self.compiler_param_val = self.input.param('compiler_param_val', None) self.expect_reject = self.input.param('expect_reject', False) self.expect_failure = self.input.param('expect_failure', False) self.compress_dataset = self.input.param('compress_dataset', False) self.index_name = self.input.param('index_name', "NoName") self.index_fields = self.input.param('index_fields', None) self.retry_time = self.input.param("retry_time", 300) self.num_retries = self.input.param("num_retries", 1) self.flush_enabled = Bucket.FlushBucket.ENABLED self.test_abort_snapshot = self.input.param("test_abort_snapshot", False) if self.index_fields: self.index_fields = self.index_fields.split("-") self.otpNodes = list() self.cbas_path = server.cbas_path self.rest = RestConnection(self.cluster.master) self.log.info("Setting the min possible memory quota so that adding " "more nodes to the cluster wouldn't be a problem.") self.rest.set_service_memoryQuota(service='memoryQuota', memoryQuota=MIN_KV_QUOTA) self.rest.set_service_memoryQuota(service='ftsMemoryQuota', memoryQuota=FTS_QUOTA) self.rest.set_service_memoryQuota(service='indexMemoryQuota', memoryQuota=INDEX_QUOTA) self.set_cbas_memory_from_available_free_memory = \ self.input.param('set_cbas_memory_from_available_free_memory', False) if self.set_cbas_memory_from_available_free_memory: info = self.rest.get_nodes_self() self.cbas_memory_quota = int((info.memoryFree // 1024 ** 2) * 0.9) self.log.info("Setting %d memory quota for CBAS" % self.cbas_memory_quota) self.rest.set_service_memoryQuota( service='cbasMemoryQuota', memoryQuota=self.cbas_memory_quota) else: self.log.info("Setting %d memory quota for CBAS" % CBAS_QUOTA) self.cbas_memory_quota = CBAS_QUOTA self.rest.set_service_memoryQuota(service='cbasMemoryQuota', memoryQuota=CBAS_QUOTA) self.cbas_util = None # Drop any existing buckets and datasets if self.cluster.cbas_nodes: self.cbas_node = self.cluster.cbas_nodes[0] self.cbas_util = CbasUtil(self.cluster.master, self.cbas_node, self.task) if "cbas" in self.cluster.master.services: self.cleanup_cbas() if add_default_cbas_node: if self.cluster.master.ip != self.cbas_node.ip: self.otpNodes.append( self.cluster_util.add_node(self.cbas_node)) else: self.otpNodes = self.rest.node_statuses() """ This cbas cleanup is actually not needed. When a node is added to the cluster, it is automatically cleaned-up. """ self.cleanup_cbas() self.cluster.cbas_nodes.remove(self.cbas_node) if self.default_bucket: self.bucket_util.create_default_bucket( bucket_type=self.bucket_type, ram_quota=self.bucket_size, replica=self.num_replicas, conflict_resolution=self.bucket_conflict_resolution_type, replica_index=self.bucket_replica_index, storage=self.bucket_storage, eviction_policy=self.bucket_eviction_policy, flush_enabled=self.flush_enabled) elif self.cb_bucket_name in self.sample_bucket_dict.keys(): self.sample_bucket = self.sample_bucket_dict[self.cb_bucket_name] self.bucket_util.add_rbac_user() self.log.info("=== CBAS_BASE setup was finished for test #{0} {1} ===" .format(self.case_number, self._testMethodName)) def tearDown(self): super(CBASBaseTest, self).tearDown() def cleanup_cbas(self): """ Drops all connections, datasets and buckets from CBAS """ try: # Disconnect from all connected buckets cmd_get_buckets = "select Name from Metadata.`Bucket`;" status, metrics, errors, results, _ = \ self.cbas_util.execute_statement_on_cbas_util(cmd_get_buckets) if (results is not None) & (len(results) > 0): for row in results: self.cbas_util.disconnect_from_bucket( row['Name'], disconnect_if_connected=True) self.log.info("******* Disconnected all buckets *******") else: self.log.info("******* No buckets to disconnect *******") # Drop all datasets cmd_get_datasets = "select DatasetName from Metadata.`Dataset` where DataverseName != \"Metadata\";" status, metrics, errors, results, _ = \ self.cbas_util.execute_statement_on_cbas_util(cmd_get_datasets) if (results is not None) & (len(results) > 0): for row in results: self.cbas_util.drop_dataset(row['DatasetName']) self.log.info("********* Dropped all datasets *********") else: self.log.info("********* No datasets to drop *********") # Drop all buckets status, metrics, errors, results, _ = \ self.cbas_util.execute_statement_on_cbas_util(cmd_get_buckets) if (results is not None) & (len(results) > 0): for row in results: self.cbas_util.drop_cbas_bucket(row['Name']) self.log.info("********* Dropped all buckets *********") else: self.log.info("********* No buckets to drop *********") self.log.info("Drop Dataverse other than Default and Metadata") cmd_get_dataverse = 'select DataverseName from Metadata.`Dataverse` where DataverseName != "Metadata" and DataverseName != "Default";' status, metrics, errors, results, _ = self.cbas_util.execute_statement_on_cbas_util(cmd_get_dataverse) if (results != None) & (len(results) > 0): for row in results: self.cbas_util.disconnect_link("`" + row['DataverseName'] + "`" + ".Local") self.cbas_util.drop_dataverse_on_cbas(dataverse_name="`" + row['DataverseName'] + "`") self.log.info("********* Dropped all dataverse except Default and Metadata *********") else: self.log.info("********* No dataverse to drop *********") except Exception as e: self.log.info(e.message) def perform_doc_ops_in_all_cb_buckets(self, operation, start_key=0, end_key=1000, batch_size=10, exp=0, _async=False, durability=""): """ Create/Update/Delete docs in all cb buckets :param operation: String - "create","update","delete" :param start_key: Doc Key to start the operation with :param end_key: Doc Key to end the operation with :param batch_size: Batch size of doc_ops :param exp: MaxTTL used for doc operations :param _async: Boolean to decide whether to start ops in parallel :param durability: Durability level to use for doc operation :return: """ first = ['james', 'sharon', 'dave', 'bill', 'mike', 'steve'] profession = ['doctor', 'lawyer'] template_obj = JsonObject.create() template_obj.put("number", 0) template_obj.put("first_name", "") template_obj.put("profession", "") template_obj.put("mutated", 0) if operation == "update": template_obj.put("mutated", 1) template_obj.put("mutation_type", "ADD") doc_gen = DocumentGenerator('test_docs', template_obj, start=start_key, end=end_key, randomize=True, first_name=first, profession=profession, number=range(70)) try: if _async: return self.bucket_util._async_load_all_buckets( self.cluster, doc_gen, operation, exp, durability=durability, batch_size=batch_size, suppress_error_table=True) else: self.bucket_util.sync_load_all_buckets( self.cluster, doc_gen, operation, exp, durability=durability, batch_size=batch_size, suppress_error_table=True) except Exception as e: self.log.error(e.message) def remove_node(self, otpnode=None, wait_for_rebalance=True): nodes = self.rest.node_statuses() '''This is the case when master node is running cbas service as well''' if len(nodes) <= len(otpnode): return helper = RestHelper(self.rest) try: removed = helper.remove_nodes( knownNodes=[node.id for node in nodes], ejectedNodes=[node.id for node in otpnode], wait_for_rebalance=wait_for_rebalance) except Exception: self.sleep(5, "Rebalance failed on Removal. Retry.. THIS IS A BUG") removed = helper.remove_nodes( knownNodes=[node.id for node in nodes], ejectedNodes=[node.id for node in otpnode], wait_for_rebalance=wait_for_rebalance) if wait_for_rebalance: self.assertTrue(removed, "Rebalance operation failed while removing %s" % otpnode)
def setUp(self, add_default_cbas_node=True): super(CBASBaseTest, self).setUp() if self._testMethodDoc: self.log.info("Starting Test: %s - %s" % (self._testMethodName, self._testMethodDoc)) else: self.log.info("Starting Test: %s" % self._testMethodName) for server in self.cluster.servers: if "cbas" in server.services: self.cluster.cbas_nodes.append(server) if "kv" in server.services: self.cluster.kv_nodes.append(server) rest = RestConnection(server) rest.set_data_path(data_path=server.data_path, index_path=server.index_path, cbas_path=server.cbas_path) invalid_ip = '10.111.151.109' self._cb_cluster = self.task self.cb_bucket_name = self.input.param('cb_bucket_name', 'travel-sample') self.sample_bucket_dict = {TravelSample().name: TravelSample(), BeerSample().name: BeerSample()} self.sample_bucket = None self.cbas_bucket_name = self.input.param('cbas_bucket_name', 'travel') self.cb_bucket_password = self.input.param('cb_bucket_password', None) self.expected_error = self.input.param("error", None) if self.expected_error: self.expected_error = self.expected_error.replace("INVALID_IP", invalid_ip) self.expected_error = \ self.expected_error.replace("PORT", self.cluster.master.port) self.cb_server_ip = self.input.param("cb_server_ip", None) self.cb_server_ip = \ self.cb_server_ip.replace('INVALID_IP', invalid_ip) \ if self.cb_server_ip is not None else None self.cbas_dataset_name = self.input.param("cbas_dataset_name", 'travel_ds') self.cbas_bucket_name_invalid = \ self.input.param('cbas_bucket_name_invalid', self.cbas_bucket_name) self.cbas_dataset2_name = self.input.param('cbas_dataset2_name', None) self.skip_create_dataset = self.input.param('skip_create_dataset', False) self.disconnect_if_connected = \ self.input.param('disconnect_if_connected', False) self.cbas_dataset_name_invalid = \ self.input.param('cbas_dataset_name_invalid', self.cbas_dataset_name) self.skip_drop_connection = self.input.param('skip_drop_connection', False) self.skip_drop_dataset = self.input.param('skip_drop_dataset', False) self.query_id = self.input.param('query_id', None) self.mode = self.input.param('mode', None) self.num_concurrent_queries = self.input.param('num_queries', 5000) self.concurrent_batch_size = self.input.param('concurrent_batch_size', 100) self.compiler_param = self.input.param('compiler_param', None) self.compiler_param_val = self.input.param('compiler_param_val', None) self.expect_reject = self.input.param('expect_reject', False) self.expect_failure = self.input.param('expect_failure', False) self.compress_dataset = self.input.param('compress_dataset', False) self.index_name = self.input.param('index_name', "NoName") self.index_fields = self.input.param('index_fields', None) self.retry_time = self.input.param("retry_time", 300) self.num_retries = self.input.param("num_retries", 1) self.flush_enabled = Bucket.FlushBucket.ENABLED self.test_abort_snapshot = self.input.param("test_abort_snapshot", False) if self.index_fields: self.index_fields = self.index_fields.split("-") self.otpNodes = list() self.cbas_path = server.cbas_path self.rest = RestConnection(self.cluster.master) self.log.info("Setting the min possible memory quota so that adding " "more nodes to the cluster wouldn't be a problem.") self.rest.set_service_memoryQuota(service='memoryQuota', memoryQuota=MIN_KV_QUOTA) self.rest.set_service_memoryQuota(service='ftsMemoryQuota', memoryQuota=FTS_QUOTA) self.rest.set_service_memoryQuota(service='indexMemoryQuota', memoryQuota=INDEX_QUOTA) self.set_cbas_memory_from_available_free_memory = \ self.input.param('set_cbas_memory_from_available_free_memory', False) if self.set_cbas_memory_from_available_free_memory: info = self.rest.get_nodes_self() self.cbas_memory_quota = int((info.memoryFree // 1024 ** 2) * 0.9) self.log.info("Setting %d memory quota for CBAS" % self.cbas_memory_quota) self.rest.set_service_memoryQuota( service='cbasMemoryQuota', memoryQuota=self.cbas_memory_quota) else: self.log.info("Setting %d memory quota for CBAS" % CBAS_QUOTA) self.cbas_memory_quota = CBAS_QUOTA self.rest.set_service_memoryQuota(service='cbasMemoryQuota', memoryQuota=CBAS_QUOTA) self.cbas_util = None # Drop any existing buckets and datasets if self.cluster.cbas_nodes: self.cbas_node = self.cluster.cbas_nodes[0] self.cbas_util = CbasUtil(self.cluster.master, self.cbas_node, self.task) if "cbas" in self.cluster.master.services: self.cleanup_cbas() if add_default_cbas_node: if self.cluster.master.ip != self.cbas_node.ip: self.otpNodes.append( self.cluster_util.add_node(self.cbas_node)) else: self.otpNodes = self.rest.node_statuses() """ This cbas cleanup is actually not needed. When a node is added to the cluster, it is automatically cleaned-up. """ self.cleanup_cbas() self.cluster.cbas_nodes.remove(self.cbas_node) if self.default_bucket: self.bucket_util.create_default_bucket( bucket_type=self.bucket_type, ram_quota=self.bucket_size, replica=self.num_replicas, conflict_resolution=self.bucket_conflict_resolution_type, replica_index=self.bucket_replica_index, storage=self.bucket_storage, eviction_policy=self.bucket_eviction_policy, flush_enabled=self.flush_enabled) elif self.cb_bucket_name in self.sample_bucket_dict.keys(): self.sample_bucket = self.sample_bucket_dict[self.cb_bucket_name] self.bucket_util.add_rbac_user() self.log.info("=== CBAS_BASE setup was finished for test #{0} {1} ===" .format(self.case_number, self._testMethodName))
def test_all_cbas_node_running_queries(self): ''' Description: Test that all the cbas nodes are capable to serve queries. Steps: 1. Perform doc operation on the KV node. 2. Add 1 cbas node and setup cbas. 3. Add all other cbas nodes. 4. Verify all cbas nodes should be able to serve queries. Author: Ritesh Agarwal ''' set_up_cbas = False query = "select count(*) from {0};".format(self.cbas_dataset_name) self.bucket_util.create_default_bucket(self.cluster, storage=self.bucket_storage) self.perform_doc_ops_in_all_cb_buckets("create", 0, self.num_items) if self.cbas_node.ip == self.cluster.master.ip: set_up_cbas = self.setup_cbas_bucket_dataset_connect("default", self.num_items) temp_cbas_util = CbasUtil(self.cluster.master, self.cbas_node, self.task) temp_cbas_util.createConn("default") self.cbas_util._run_concurrent_queries(query,None,1000,self.cbas_util) temp_cbas_util.closeConn() for node in self.cluster.cbas_nodes: if node.ip != self.cluster.master.ip: self.cluster_util.add_node(node=node) if not set_up_cbas: set_up_cbas = self.setup_cbas_bucket_dataset_connect("default", self.num_items) temp_cbas_util = CbasUtil(self.cluster.master, self.cbas_node, self.task) temp_cbas_util.createConn("default") self.cbas_util._run_concurrent_queries(query,None,1000,self.cbas_util,batch_size=self.concurrent_batch_size) temp_cbas_util.closeConn()
def test_logging_configurations_are_restored_post_service_restarts(self): self.log.info("Add a cbas node") result = self.cluster_util.add_node(self.cluster.cbas_nodes[0], services=["cbas"], rebalance=True) self.assertTrue(result, msg="Failed to add CBAS node") self.log.info("Delete all loggers") self.cbas_util.delete_all_loggers_on_cbas() self.log.info("Set the logging level using the json object") status, content, response = self.cbas_util.set_log_level_on_cbas( CbasLogging.DEFAULT_LOGGER_CONFIG_DICT) self.assertTrue(status, msg="Response status incorrect for SET request") self.log.info("Delete specific logger") logger_name = self.input.param("logger_name_to_delete", "com.couchbase.client.core.node") status, content, response = self.cbas_util.delete_specific_cbas_log_level( logger_name) self.assertTrue(status, msg="Status mismatch for DELETE") del CbasLogging.DEFAULT_LOGGER_CONFIG_DICT[logger_name] self.log.info("Update specific logger") logger_name = self.input.param("logger_name_to_update", "org.apache.hyracks") logger_level_to_update = self.input.param("logger_level_to_update", "FATAL") status, response, content = self.cbas_util.set_specific_log_level_on_cbas( logger_name, logger_level_to_update) self.assertTrue(status, msg="Status mismatch for SET") CbasLogging.DEFAULT_LOGGER_CONFIG_DICT[ logger_name] = logger_level_to_update self.log.info("Add a new logger") logger_name = self.input.param("logger_name_to_add", "org.apache.hyracks123") logger_level_to_add = self.input.param("logger_level_to_add", "ALL") status, response, content = self.cbas_util.set_specific_log_level_on_cbas( logger_name, logger_level_to_add) self.assertTrue(status, msg="Status mismatch for SET") CbasLogging.DEFAULT_LOGGER_CONFIG_DICT[ logger_name] = logger_level_to_add self.log.info("Verify logging configuration that we set on cbas Node") for name, level in CbasLogging.DEFAULT_LOGGER_CONFIG_DICT.items(): status, content, response = self.cbas_util.get_specific_cbas_log_level( name) self.assertTrue(status, msg="Response status incorrect for GET request") self.assertEquals(content, level, msg="Logger configuration mismatch for logger " + name) self.sleep( timeout=10, message= "Waiting for logger configuration to be copied across cbas nodes") self.log.info("Verify logging configuration on other cbas node") for name, level in CbasLogging.DEFAULT_LOGGER_CONFIG_DICT.items(): status, content, response = CbasUtil( self.cluster.master, self.cluster.cbas_nodes[0]).get_specific_cbas_log_level(name) self.assertTrue(status, msg="Response status incorrect for GET request") self.assertEquals(content, level, msg="Logger configuration mismatch for logger " + name) self.log.info("Read input params") process_name = self.input.param('process_name', None) service_name = self.input.param('service_name', None) restart_couchbase = self.input.param('restart_couchbase', False) reboot = self.input.param('reboot', False) kill_services = self.input.param('kill_services', False) self.log.info("Establish a remote connection") shell_cc = RemoteMachineShellConnection(self.cbas_node) shell_nc = RemoteMachineShellConnection(self.cluster.cbas_nodes[0]) if kill_services: self.log.info("Kill the %s service on CC cbas node" % service_name) shell_cc.kill_process(process_name, service_name) self.log.info("Kill the %s service on other cbas node" % service_name) shell_nc.kill_process(process_name, service_name) if restart_couchbase: self.log.info("Restart couchbase service") status, _, _ = self.cbas_util.restart_analytics_cluster_uri() self.assertTrue(status, msg="Failed to restart cbas") if reboot: self.log.info("Reboot couchbase CC node") shell = RemoteMachineShellConnection(self.cbas_node) shell.reboot_server_and_wait_for_cb_run(self.cluster_util, self.cbas_node) shell.disconnect() self.log.info("Reboot couchbase NC node") shell = RemoteMachineShellConnection(self.cluster.cbas_nodes[0]) shell.reboot_server_and_wait_for_cb_run(self.cluster_util, self.cluster.cbas_nodes[0]) shell.disconnect() self.log.info( "Wait for request to complete and cluster to be active: Using private ping() function" ) cluster_recover_start_time = time.time() while time.time() < cluster_recover_start_time + 180: try: status, metrics, _, cbas_result, _ = self.cbas_util.execute_statement_on_cbas_util( "set `import-private-functions` `true`;ping()") if status == "success": break except: self.sleep(3, message="Wait for service to up") self.log.info("Verify logging configuration post service kill") for name, level in CbasLogging.DEFAULT_LOGGER_CONFIG_DICT.items(): status, content, response = self.cbas_util.get_specific_cbas_log_level( name) self.assertTrue(status, msg="Response status incorrect for GET request") self.assertEquals(content, level, msg="Logger configuration mismatch for logger " + name) self.sleep( timeout=10, message= "Waiting for logger configuration to be copied across cbas nodes") self.log.info( "Verify logging configuration on other cbas node post service kill" ) for name, level in CbasLogging.DEFAULT_LOGGER_CONFIG_DICT.items(): status, content, response = CbasUtil( self.cluster.master, self.cluster.cbas_nodes[0]).get_specific_cbas_log_level(name) self.assertTrue(status, msg="Response status incorrect for GET request") self.assertEquals(content, level, msg="Logger configuration mismatch for logger " + name)
class MetadataReplication(CBASBaseTest): def tearDown(self): CBASBaseTest.tearDown(self) def setUp(self): self.input = TestInputSingleton.input self.input.test_params.update({"default_bucket": False}) super(MetadataReplication, self).setUp() self.nc_otpNodes = [] if "add_all_cbas_nodes" in self.input.test_params and self.input.test_params[ "add_all_cbas_nodes"] and len(self.cluster.cbas_nodes) > 0: self.nc_otpNodes = self.add_all_nodes_then_rebalance( self.cluster.cbas_nodes) elif self.input.param("nc_nodes_to_add", 0): self.nc_otpNodes = self.add_all_nodes_then_rebalance( self.cluster.cbas_nodes[:self.input.param("nc_nodes_to_add")]) self.otpNodes += self.nc_otpNodes self.bucket_util.create_default_bucket(self.cluster, storage=self.bucket_storage) self.cbas_util.createConn("default") self.shell = RemoteMachineShellConnection(self.cluster.master) #test for number of partitions: self.partitions_dict = self.cbas_util.get_num_partitions(self.shell) # if self.cluster.master.cbas_path: # for key in self.partitions_dict.keys(): # self.assertTrue(self.partitions_dict[key] == len(ast.literal_eval(self.cluster.master.cbas_path)), "Number of partitions created are incorrect on cbas nodes.") def setup_for_test(self, skip_data_loading=False): if not skip_data_loading: # Load Couchbase bucket first. self.perform_doc_ops_in_all_cb_buckets("create", 0, self.num_items, batch_size=1000) # Create dataset on the CBAS bucket self.cbas_util.create_dataset_on_bucket( cbas_bucket_name=self.cb_bucket_name, cbas_dataset_name=self.cbas_dataset_name) # Create indexes on the CBAS bucket self.create_secondary_indexes = self.input.param( "create_secondary_indexes", False) if self.create_secondary_indexes: self.index_fields = "profession:string,number:bigint" create_idx_statement = "create index {0} on {1}({2});".format( self.index_name, self.cbas_dataset_name, self.index_fields) status, metrics, errors, results, _ = self.cbas_util.execute_statement_on_cbas_util( create_idx_statement) self.assertTrue(status == "success", "Create Index query failed") self.assertTrue( self.cbas_util.verify_index_created( self.index_name, self.index_fields.split(","), self.cbas_dataset_name)[0]) # Connect to Bucket self.cbas_util.connect_to_bucket( cbas_bucket_name=self.cbas_bucket_name, cb_bucket_password=self.cb_bucket_password) if not skip_data_loading: # Validate no. of items in CBAS dataset if not self.cbas_util.validate_cbas_dataset_items_count( self.cbas_dataset_name, self.num_items): self.fail( "No. of items in CBAS dataset do not match that in the CB bucket" ) def ingestion_in_progress(self): self.cbas_util.disconnect_from_bucket(self.cbas_bucket_name) self.perform_doc_ops_in_all_cb_buckets("create", 0, self.num_items * 2, batch_size=1000) self.cbas_util.connect_to_bucket( cbas_bucket_name=self.cbas_bucket_name, cb_bucket_password=self.cb_bucket_password) def ingest_more_data(self): self.cbas_util.disconnect_from_bucket(self.cbas_bucket_name) self.perform_doc_ops_in_all_cb_buckets("create", self.num_items * 2, self.num_items * 4, batch_size=1000) self.cbas_util.connect_to_bucket( cbas_bucket_name=self.cbas_bucket_name, cb_bucket_password=self.cb_bucket_password) if not self.cbas_util.validate_cbas_dataset_items_count( self.cbas_dataset_name, self.num_items * 4): self.fail( "No. of items in CBAS dataset do not match that in the CB bucket" ) def test_rebalance(self): self.setup_for_test(skip_data_loading=True) self.rebalance_type = self.input.param('rebalance_type', 'out') self.rebalance_node = self.input.param('rebalance_node', 'CC') self.how_many = self.input.param('how_many', 1) self.restart_rebalance = self.input.param('restart_rebalance', False) self.replica_change = self.input.param('replica_change', 0) query = "select sleep(count(*),50000) from {0};".format( self.cbas_dataset_name) handles = self.cbas_util._run_concurrent_queries(query, "async", 10) self.ingestion_in_progress() otpNodes = [] if self.rebalance_node == "CC": node_in_test = [self.cbas_node] otpNodes = [self.otpNodes[0]] self.cbas_util.closeConn() self.cbas_util = CbasUtil(self.cluster.master, self.cluster.cbas_nodes[0]) self.cbas_util.createConn("default") self.cbas_node = self.cluster.cbas_nodes[0] elif self.rebalance_node == "NC": node_in_test = self.cluster.cbas_nodes[:self.how_many] otpNodes = self.nc_otpNodes[:self.how_many] else: node_in_test = [self.cbas_node ] + self.cluster.cbas_nodes[:self.how_many] otpNodes = self.otpNodes[:self.how_many + 1] self.cbas_util.closeConn() self.cbas_util = CbasUtil(self.cluster.master, self.cluster.cbas_nodes[self.how_many]) self.cbas_util.createConn("default") replicas_before_rebalance = len( self.cbas_util.get_replicas_info(self.shell)) if self.rebalance_type == 'in': if self.restart_rebalance: self.cluster_util.add_all_nodes_then_rebalance( self.cluster, self.cluster.cbas_nodes[ self.input.param("nc_nodes_to_add"):self.how_many + self.input.param("nc_nodes_to_add")], wait_for_completion=False) self.sleep(2) if self.rest._rebalance_progress_status() == "running": self.assertTrue(self.rest.stop_rebalance(wait_timeout=120), "Failed while stopping rebalance.") self.sleep( 30, "Wait for some tine after rebalance is stopped.") else: self.fail( "Rebalance completed before the test could have stopped rebalance." ) self.rebalance(wait_for_completion=False) else: self.cluster_util.add_all_nodes_then_rebalance( self.cluster, self.cluster.cbas_nodes[ self.input.param("nc_nodes_to_add"):self.how_many + self.input.param("nc_nodes_to_add")], wait_for_completion=False) replicas_before_rebalance += self.replica_change else: if self.restart_rebalance: self.cluster_util.remove_node(self.cluster, otpNodes, wait_for_rebalance=False) self.sleep(2) if self.rest._rebalance_progress_status() == "running": self.assertTrue(self.rest.stop_rebalance(wait_timeout=120), "Failed while stopping rebalance.") self.sleep( 30, "Wait for some tine after rebalance is stopped.") else: self.fail( "Rebalance completed before the test could have stopped rebalance." ) self.rebalance(wait_for_completion=False, ejected_nodes=[node.id for node in otpNodes]) else: self.cluster_util.remove_node(self.cluster, otpNodes, wait_for_rebalance=False) replicas_before_rebalance -= self.replica_change self.sleep(30) str_time = time.time() while self.rest._rebalance_progress_status( ) == "running" and time.time() < str_time + 300: replicas = self.cbas_util.get_replicas_info(self.shell) if replicas: for replica in replicas: self.log.info("replica state during rebalance: %s" % replica['status']) self.sleep(2) replicas = self.cbas_util.get_replicas_info(self.shell) replicas_after_rebalance = len(replicas) self.assertEqual( replicas_after_rebalance, replicas_before_rebalance, "%s,%s" % (replicas_after_rebalance, replicas_before_rebalance)) for replica in replicas: self.log.info("replica state during rebalance: %s" % replica['status']) self.assertEqual( replica['status'], "IN_SYNC", "Replica state is incorrect: %s" % replica['status']) # items_in_cbas_bucket, _ = self.cbas_util.get_num_items_in_cbas_dataset(self.cbas_dataset_name) # self.log.info("Items before service restart: %s"%items_in_cbas_bucket) count = 0 while self.cbas_util.fetch_analytics_cluster_response( )['state'] != "ACTIVE" and count < 60: self.sleep(5) count += 1 items_in_cbas_bucket = 0 start_time = time.time() while (items_in_cbas_bucket == 0 or items_in_cbas_bucket == -1) and time.time() < start_time + 60: try: items_in_cbas_bucket, _ = self.cbas_util.get_num_items_in_cbas_dataset( self.cbas_dataset_name) except: pass self.sleep(1) self.log.info("After rebalance operation docs in CBAS bucket : %s" % items_in_cbas_bucket) if items_in_cbas_bucket < self.num_items * 2 and items_in_cbas_bucket > self.num_items: self.log.info("Data Ingestion Interrupted successfully") elif items_in_cbas_bucket < self.num_items: self.log.info( "Data Ingestion did interrupted and restarting from 0.") else: self.log.info( "Data Ingestion did not interrupted but complete before rebalance operation." ) run_count = 0 fail_count = 0 success_count = 0 aborted_count = 0 shell = RemoteMachineShellConnection(node_in_test[0]) for handle in handles: status, hand = self.cbas_util.retrieve_request_status_using_handle( node_in_test, handle, shell) if status == "running": run_count += 1 self.log.info("query with handle %s is running." % handle) elif status == "failed": fail_count += 1 self.log.info("query with handle %s is failed." % handle) elif status == "success": success_count += 1 self.log.info("query with handle %s is successful." % handle) else: aborted_count += 1 self.log.info("Queued job is deleted: %s" % status) self.log.info("After service restart %s queued jobs are Running." % run_count) self.log.info("After service restart %s queued jobs are Failed." % fail_count) self.log.info("After service restart %s queued jobs are Successful." % success_count) self.log.info("After service restart %s queued jobs are Aborted." % aborted_count) if self.rebalance_node == "NC": self.assertTrue(aborted_count == 0, "Some queries aborted") query = "select count(*) from {0};".format(self.cbas_dataset_name) self.cbas_util._run_concurrent_queries(query, "immediate", 100) if not self.cbas_util.validate_cbas_dataset_items_count( self.cbas_dataset_name, self.num_items * 2): self.fail( "No. of items in CBAS dataset do not match that in the CB bucket" ) self.ingest_more_data() def test_cancel_CC_rebalance(self): pass def test_chain_rebalance_out_cc(self): self.setup_for_test(skip_data_loading=True) self.ingestion_in_progress() total_cbas_nodes = len(self.otpNodes) while total_cbas_nodes > 1: cc_ip = self.cbas_util.retrieve_cc_ip(shell=self.shell) for otpnode in self.otpNodes: if otpnode.ip == cc_ip: self.cluster_util.remove_node(self.cluster, [otpnode], wait_for_rebalance=True) for server in self.cluster.cbas_nodes: if cc_ip != server.ip: self.cbas_util.closeConn() self.cbas_util = CbasUtil(self.cluster.master, server) self.cbas_util.createConn("default") self.cbas_node = server break # items_in_cbas_bucket, _ = self.cbas_util.get_num_items_in_cbas_dataset(self.cbas_dataset_name) # self.log.info("Items before service restart: %s"%items_in_cbas_bucket) items_in_cbas_bucket = 0 start_time = time.time() while (items_in_cbas_bucket == 0 or items_in_cbas_bucket == -1) and time.time() < start_time + 60: try: items_in_cbas_bucket, _ = self.cbas_util.get_num_items_in_cbas_dataset( self.cbas_dataset_name) except: pass self.sleep(1) self.log.info( "After rebalance operation docs in CBAS bucket : %s" % items_in_cbas_bucket) if items_in_cbas_bucket < self.num_items * 2 and items_in_cbas_bucket > self.num_items: self.log.info( "Data Ingestion Interrupted successfully") elif items_in_cbas_bucket < self.num_items: self.log.info( "Data Ingestion did interrupted and restarting from 0." ) else: self.log.info( "Data Ingestion did not interrupted but complete before rebalance operation." ) query = "select count(*) from {0};".format( self.cbas_dataset_name) self.cbas_util._run_concurrent_queries( query, "immediate", 10) break total_cbas_nodes -= 1 if not self.cbas_util.validate_cbas_dataset_items_count( self.cbas_dataset_name, self.num_items * 2): self.fail( "No. of items in CBAS dataset do not match that in the CB bucket" ) self.ingest_more_data() def test_cc_swap_rebalance(self): self.restart_rebalance = self.input.param('restart_rebalance', False) self.setup_for_test(skip_data_loading=True) query = "select sleep(count(*),50000) from {0};".format( self.cbas_dataset_name) handles = self.cbas_util._run_concurrent_queries(query, "async", 10) self.ingestion_in_progress() replicas_before_rebalance = len( self.cbas_util.get_replicas_info(self.shell)) self.cluster_util.add_node(node=self.cluster.cbas_nodes[-1], rebalance=False) swap_nc = self.input.param('swap_nc', False) if not swap_nc: out_nodes = [self.otpNodes[0]] self.cbas_util.closeConn() self.cbas_util = CbasUtil(self.cluster.master, self.cluster.cbas_nodes[0]) self.cbas_util.createConn("default") self.cbas_node = self.cluster.cbas_nodes[0] else: out_nodes = [self.otpNodes[1]] self.cluster_util.remove_node(self.cluster, out_nodes, wait_for_rebalance=False) self.sleep(5, "Wait for sometime after rebalance started.") if self.restart_rebalance: if self.rest._rebalance_progress_status() == "running": self.assertTrue(self.rest.stop_rebalance(wait_timeout=120), "Failed while stopping rebalance.") self.sleep(10) else: self.fail( "Rebalance completed before the test could have stopped rebalance." ) self.rebalance(ejected_nodes=[node.id for node in out_nodes], wait_for_completion=False) self.sleep(5) str_time = time.time() while self.rest._rebalance_progress_status( ) == "running" and time.time() < str_time + 300: replicas = self.cbas_util.get_replicas_info(self.shell) if replicas: for replica in replicas: self.log.info("replica state during rebalance: %s" % replica['status']) self.sleep(30) replicas = self.cbas_util.get_replicas_info(self.shell) replicas_after_rebalance = len(replicas) self.assertEqual( replicas_after_rebalance, replicas_before_rebalance, "%s,%s" % (replicas_after_rebalance, replicas_before_rebalance)) for replica in replicas: self.log.info("replica state during rebalance: %s" % replica['status']) self.assertEqual( replica['status'], "IN_SYNC", "Replica state is incorrect: %s" % replica['status']) # items_in_cbas_bucket, _ = self.cbas_util.get_num_items_in_cbas_dataset(self.cbas_dataset_name) # self.log.info("Items before service restart: %s"%items_in_cbas_bucket) items_in_cbas_bucket = 0 start_time = time.time() while (items_in_cbas_bucket == 0 or items_in_cbas_bucket == -1) and time.time() < start_time + 60: try: items_in_cbas_bucket, _ = self.cbas_util.get_num_items_in_cbas_dataset( self.cbas_dataset_name) except: pass self.sleep(1) self.log.info("After rebalance operation docs in CBAS bucket : %s" % items_in_cbas_bucket) if items_in_cbas_bucket < self.num_items * 2 and items_in_cbas_bucket > self.num_items: self.log.info("Data Ingestion Interrupted successfully") elif items_in_cbas_bucket < self.num_items: self.log.info( "Data Ingestion did interrupted and restarting from 0.") else: self.log.info( "Data Ingestion did not interrupted but complete before rebalance operation." ) run_count = 0 fail_count = 0 success_count = 0 aborted_count = 0 shell = RemoteMachineShellConnection(self.cluster.master) for handle in handles: status, hand = self.cbas_util.retrieve_request_status_using_handle( self.cluster.master, handle, shell) if status == "running": run_count += 1 self.log.info("query with handle %s is running." % handle) elif status == "failed": fail_count += 1 self.log.info("query with handle %s is failed." % handle) elif status == "success": success_count += 1 self.log.info("query with handle %s is successful." % handle) else: aborted_count += 1 self.log.info("Queued job is deleted: %s" % status) self.log.info("After service restart %s queued jobs are Running." % run_count) self.log.info("After service restart %s queued jobs are Failed." % fail_count) self.log.info("After service restart %s queued jobs are Successful." % success_count) self.log.info("After service restart %s queued jobs are Aborted." % aborted_count) query = "select count(*) from {0};".format(self.cbas_dataset_name) self.cbas_util._run_concurrent_queries(query, "immediate", 100) if not self.cbas_util.validate_cbas_dataset_items_count( self.cbas_dataset_name, self.num_items * 2): self.fail( "No. of items in CBAS dataset do not match that in the CB bucket" ) self.ingest_more_data() def test_reboot_nodes(self): #Test for reboot CC and reboot all nodes. self.setup_for_test(skip_data_loading=True) self.ingestion_in_progress() self.node_type = self.input.param('node_type', 'CC') replica_nodes_before_reboot = self.cbas_util.get_replicas_info( self.shell) replicas_before_reboot = len( self.cbas_util.get_replicas_info(self.shell)) if self.node_type == "CC": shell = RemoteMachineShellConnection(self.cbas_node) shell.reboot_server_and_wait_for_cb_run(self.cluster_util, self.cbas_node) shell.disconnect() elif self.node_type == "NC": for server in self.cluster.cbas_nodes: shell = RemoteMachineShellConnection(server) shell.reboot_server_and_wait_for_cb_run( self.cluster_util, server) shell.disconnect() else: shell = RemoteMachineShellConnection(self.cbas_node) shell.reboot_server_and_wait_for_cb_run(self.cluster_util, self.cbas_node) shell.disconnect() for server in self.cluster.cbas_nodes: shell = RemoteMachineShellConnection(server) shell.reboot_server_and_wait_for_cb_run( self.cluster_util, server) shell.disconnect() self.sleep(60) replica_nodes_after_reboot = self.cbas_util.get_replicas_info( self.shell) replicas_after_reboot = len(replica_nodes_after_reboot) self.assertTrue( replica_nodes_after_reboot == replica_nodes_before_reboot, "Replica nodes changed after reboot. Before: %s , After : %s" % (replica_nodes_before_reboot, replica_nodes_after_reboot)) self.assertTrue( replicas_after_reboot == replicas_before_reboot, "Number of Replica nodes changed after reboot. Before: %s , After : %s" % (replicas_before_reboot, replicas_after_reboot)) items_in_cbas_bucket = 0 start_time = time.time() while (items_in_cbas_bucket == 0 or items_in_cbas_bucket == -1) and time.time() < start_time + 60: try: items_in_cbas_bucket, _ = self.cbas_util.get_num_items_in_cbas_dataset( self.cbas_dataset_name) except: pass self.sleep(1) query = "select count(*) from {0};".format(self.cbas_dataset_name) self.cbas_util._run_concurrent_queries(query, "immediate", 100) if not self.cbas_util.validate_cbas_dataset_items_count( self.cbas_dataset_name, self.num_items * 2): self.fail( "No. of items in CBAS dataset do not match that in the CB bucket" ) for replica in replica_nodes_after_reboot: self.log.info("replica state during rebalance: %s" % replica['status']) self.assertEqual( replica['status'], "IN_SYNC", "Replica state is incorrect: %s" % replica['status']) self.ingest_more_data() def test_failover(self): self.setup_for_test(skip_data_loading=True) self.rebalance_node = self.input.param('rebalance_node', 'CC') self.how_many = self.input.param('how_many', 1) self.restart_rebalance = self.input.param('restart_rebalance', False) self.replica_change = self.input.param('replica_change', 0) self.add_back = self.input.param('add_back', False) query = "select sleep(count(*),50000) from {0};".format( self.cbas_dataset_name) handles = self.cbas_util._run_concurrent_queries(query, "async", 10) self.ingestion_in_progress() if self.rebalance_node == "CC": node_in_test = [self.cbas_node] otpNodes = [self.otpNodes[0]] self.cbas_util.closeConn() self.cbas_util = CbasUtil(self.cluster.master, self.cluster.cbas_nodes[0]) self.cbas_util.createConn("default") self.cbas_node = self.cluster.cbas_nodes[0] elif self.rebalance_node == "NC": node_in_test = self.cluster.cbas_nodes[:self.how_many] otpNodes = self.nc_otpNodes[:self.how_many] else: node_in_test = [self.cbas_node ] + self.cluster.cbas_nodes[:self.how_many] otpNodes = self.otpNodes[:self.how_many + 1] self.cbas_util.closeConn() self.cbas_util = CbasUtil(self.cluster.master, self.cluster.cbas_nodes[self.how_many]) self.cbas_util.createConn("default") replicas_before_rebalance = len( self.cbas_util.get_replicas_info(self.shell)) items_in_cbas_bucket = 0 start_time = time.time() while (items_in_cbas_bucket == 0 or items_in_cbas_bucket == -1) and time.time() < start_time + 60: try: items_in_cbas_bucket, _ = self.cbas_util.get_num_items_in_cbas_dataset( self.cbas_dataset_name) except: pass self.sleep(1) self.log.info("Items before failover node: %s" % items_in_cbas_bucket) if self.restart_rebalance: graceful_failover = self.input.param("graceful_failover", False) failover_task = self._cb_cluster.async_failover( self.input.servers, node_in_test, graceful_failover) self.task_manager.get_task_result(failover_task) if self.add_back: for otpnode in otpNodes: self.rest.set_recovery_type('ns_1@' + otpnode.ip, "full") self.rest.add_back_node('ns_1@' + otpnode.ip) self.rebalance(wait_for_completion=False) else: self.rebalance(ejected_nodes=[node.id for node in otpNodes], wait_for_completion=False) self.sleep(2) if self.rest._rebalance_progress_status() == "running": self.assertTrue(self.rest.stop_rebalance(wait_timeout=120), "Failed while stopping rebalance.") if self.add_back: self.rebalance(wait_for_completion=False) else: self.rebalance( ejected_nodes=[node.id for node in otpNodes], wait_for_completion=False) else: self.fail( "Rebalance completed before the test could have stopped rebalance." ) else: graceful_failover = self.input.param("graceful_failover", False) failover_task = self._cb_cluster.async_failover( self.input.servers, node_in_test, graceful_failover) self.task_manager.get_task_result(failover_task) if self.add_back: for otpnode in otpNodes: self.rest.set_recovery_type('ns_1@' + otpnode.ip, "full") self.rest.add_back_node('ns_1@' + otpnode.ip) self.rebalance(wait_for_completion=False) replicas_before_rebalance -= self.replica_change self.sleep(5) str_time = time.time() while self.rest._rebalance_progress_status( ) == "running" and time.time() < str_time + 300: replicas = self.cbas_util.get_replicas_info(self.shell) if replicas: for replica in replicas: self.log.info("replica state during rebalance: %s" % replica['status']) self.sleep(15) replicas = self.cbas_util.get_replicas_info(self.shell) replicas_after_rebalance = len(replicas) self.assertEqual( replicas_after_rebalance, replicas_before_rebalance, "%s,%s" % (replicas_after_rebalance, replicas_before_rebalance)) for replica in replicas: self.log.info("replica state during rebalance: %s" % replica['status']) self.assertEqual( replica['status'], "IN_SYNC", "Replica state is incorrect: %s" % replica['status']) items_in_cbas_bucket = 0 start_time = time.time() while (items_in_cbas_bucket == 0 or items_in_cbas_bucket == -1) and time.time() < start_time + 60: try: items_in_cbas_bucket, _ = self.cbas_util.get_num_items_in_cbas_dataset( self.cbas_dataset_name) except: pass self.sleep(1) self.log.info("After rebalance operation docs in CBAS bucket : %s" % items_in_cbas_bucket) if items_in_cbas_bucket < self.num_items * 2 and items_in_cbas_bucket > self.num_items: self.log.info("Data Ingestion Interrupted successfully") elif items_in_cbas_bucket < self.num_items: self.log.info( "Data Ingestion did interrupted and restarting from 0.") else: self.log.info( "Data Ingestion did not interrupted but complete before rebalance operation." ) run_count = 0 fail_count = 0 success_count = 0 aborted_count = 0 shell = RemoteMachineShellConnection(node_in_test[0]) for handle in handles: status, hand = self.cbas_util.retrieve_request_status_using_handle( node_in_test, handle, shell) if status == "running": run_count += 1 self.log.info("query with handle %s is running." % handle) elif status == "failed": fail_count += 1 self.log.info("query with handle %s is failed." % handle) elif status == "success": success_count += 1 self.log.info("query with handle %s is successful." % handle) else: aborted_count += 1 self.log.info("Queued job is deleted: %s" % status) self.log.info("After service restart %s queued jobs are Running." % run_count) self.log.info("After service restart %s queued jobs are Failed." % fail_count) self.log.info("After service restart %s queued jobs are Successful." % success_count) self.log.info("After service restart %s queued jobs are Aborted." % aborted_count) if self.rebalance_node == "NC": self.assertTrue(aborted_count == 0, "Some queries aborted") query = "select count(*) from {0};".format(self.cbas_dataset_name) self.cbas_util._run_concurrent_queries(query, "immediate", 100) if not self.cbas_util.validate_cbas_dataset_items_count( self.cbas_dataset_name, self.num_items * 2): self.fail( "No. of items in CBAS dataset do not match that in the CB bucket" ) self.ingest_more_data()
class UpgradeTests(UpgradeBase): def setUp(self): super(UpgradeTests, self).setUp() self.cbas_util = CbasUtil(self.task) self.cbas_spec_name = self.input.param("cbas_spec", "local_datasets") self.rebalance_util = CBASRebalanceUtil(self.cluster_util, self.bucket_util, self.task, vbucket_check=True, cbas_util=self.cbas_util) if self.input.param("n2n_encryption", False): CbServer.use_https = True trust_all_certs() self.security_util = SecurityUtils(self.log) rest = RestConnection(self.cluster.master) self.log.info("Disabling Auto-Failover") if not rest.update_autofailover_settings(False, 120): self.fail("Disabling Auto-Failover failed") self.log.info("Setting node to node encryption level to all") self.security_util.set_n2n_encryption_level_on_nodes( self.cluster.nodes_in_cluster, level="all") CbServer.use_https = True self.log.info("Enabling Auto-Failover") if not rest.update_autofailover_settings(True, 300): self.fail("Enabling Auto-Failover failed") cbas_cc_node_ip = None retry = 0 self.cluster.cbas_nodes = \ self.cluster_util.get_nodes_from_services_map( self.cluster, service_type="cbas", get_all_nodes=True, servers=self.cluster.nodes_in_cluster) while True and retry < 60: cbas_cc_node_ip = self.cbas_util.retrieve_cc_ip_from_master( self.cluster) if cbas_cc_node_ip: break else: self.sleep(10, "Waiting for CBAS service to come up") retry += 1 if not cbas_cc_node_ip: self.fail("CBAS service did not come up even after 10 " "mins.") for server in self.cluster.cbas_nodes: if server.ip == cbas_cc_node_ip: self.cluster.cbas_cc_node = server break if not self.cbas_util.wait_for_cbas_to_recover(self.cluster, timeout=300): self.fail("Analytics service failed to start post adding cbas " "nodes to cluster") self.pre_upgrade_setup() self.log_setup_status(self.__class__.__name__, "Finished", stage=self.setUp.__name__) def tearDown(self): self.log_setup_status(self.__class__.__name__, "Started", stage=self.tearDown.__name__) self.cluster.master = self.cluster_util.get_kv_nodes(self.cluster)[0] self.cluster_util.cluster_cleanup(self.cluster, self.bucket_util) super(UpgradeTests, self).tearDown() self.log_setup_status(self.__class__.__name__, "Finished", stage=self.tearDown.__name__) def pre_upgrade_setup(self): """ Number of datasets is fixed here, as pre 6.6 default max number of datasets that can be created was 8. """ major_version = float(self.initial_version[:3]) if major_version >= 7.0: update_spec = { "no_of_dataverses": self.input.param('pre_update_no_of_dv', 2), "no_of_datasets_per_dataverse": self.input.param('pre_update_ds_per_dv', 4), "no_of_synonyms": self.input.param('pre_update_no_of_synonym', 0), "no_of_indexes": self.input.param('pre_update_no_of_index', 3), "max_thread_count": self.input.param('no_of_threads', 10), } else: update_spec = { "no_of_dataverses": self.input.param('pre_update_no_of_dv', 2), "no_of_datasets_per_dataverse": self.input.param('pre_update_ds_per_dv', 4), "no_of_synonyms": 0, "no_of_indexes": self.input.param('pre_update_no_of_index', 3), "max_thread_count": self.input.param('no_of_threads', 10), "dataverse": { "cardinality": 1, "creation_method": "dataverse" }, "dataset": { "creation_methods": ["cbas_dataset"], "bucket_cardinality": 1 }, "index": { "creation_method": "index" } } if update_spec["no_of_dataverses"] * update_spec[ "no_of_datasets_per_dataverse"] > 8: self.fail("Total number of datasets across all dataverses " "cannot be more than 8 for pre 7.0 builds") if not self.cbas_setup(update_spec): self.fail("Pre Upgrade CBAS setup failed") if major_version >= 7.1: self.replica_num = self.input.param('replica_num', 0) set_result = self.cbas_util.set_replica_number_from_settings( self.cluster.master, replica_num=self.replica_num) if set_result != self.replica_num: self.fail("Error while setting replica for CBAS") self.log.info( "Rebalancing for CBAS replica setting change to take " "effect.") rebalance_task, _ = self.rebalance_util.rebalance( self.cluster, kv_nodes_in=0, kv_nodes_out=0, cbas_nodes_in=0, cbas_nodes_out=0, available_servers=[], exclude_nodes=[]) if not self.rebalance_util.wait_for_rebalance_task_to_complete( rebalance_task, self.cluster): self.fail("Rebalance failed") def cbas_setup(self, update_spec, connect_local_link=True): if self.cbas_spec_name: self.cbas_spec = self.cbas_util.get_cbas_spec(self.cbas_spec_name) self.cbas_util.update_cbas_spec(self.cbas_spec, update_spec) cbas_infra_result = self.cbas_util.create_cbas_infra_from_spec( self.cluster, self.cbas_spec, self.bucket_util, wait_for_ingestion=False) if not cbas_infra_result[0]: self.log.error( "Error while creating infra from CBAS spec -- {0}".format( cbas_infra_result[1])) return False if connect_local_link: for dataverse in self.cbas_util.dataverses: if not self.cbas_util.connect_link( self.cluster, ".".join([dataverse, "Local"])): self.log.error( "Failed to connect Local link for dataverse - {0}". format(dataverse)) return False if not self.cbas_util.wait_for_ingestion_all_datasets( self.cluster, self.bucket_util): self.log.error("Data ingestion did not happen in the datasets") return False return True def post_upgrade_validation(self): major_version = float(self.upgrade_version[:3]) # rebalance once again to activate CBAS service self.sleep(180, "Sleep before rebalancing to activate CBAS service") rebalance_task, _ = self.rebalance_util.rebalance(self.cluster, kv_nodes_in=0, kv_nodes_out=0, cbas_nodes_in=0, cbas_nodes_out=0, available_servers=[], exclude_nodes=[]) if not self.rebalance_util.wait_for_rebalance_task_to_complete( rebalance_task, self.cluster): self.log_failure("Rebalance failed") return False rest = RestConnection(self.cluster.master) # Update RAM quota allocated to buckets created before upgrade cluster_info = rest.get_nodes_self() kv_quota = \ cluster_info.__getattribute__(CbServer.Settings.KV_MEM_QUOTA) bucket_size = kv_quota // (self.input.param("num_buckets", 1) + 1) for bucket in self.cluster.buckets: self.bucket_util.update_bucket_property(self.cluster.master, bucket, bucket_size) validation_results = {} self.log.info("Validating pre upgrade cbas infra") results = list() for dataverse in self.cbas_util.dataverses: results.append( self.cbas_util.validate_dataverse_in_metadata( self.cluster, dataverse)) for dataset in self.cbas_util.list_all_dataset_objs( dataset_source="internal"): results.append( self.cbas_util.validate_dataset_in_metadata( self.cluster, dataset_name=dataset.name, dataverse_name=dataset.dataverse_name)) results.append( self.cbas_util.validate_cbas_dataset_items_count( self.cluster, dataset_name=dataset.full_name, expected_count=dataset.num_of_items)) for index in self.cbas_util.list_all_index_objs(): result, _ = self.cbas_util.verify_index_created( self.cluster, index_name=index.name, dataset_name=index.dataset_name, indexed_fields=index.indexed_fields) results.append(result) results.append( self.cbas_util.verify_index_used( self.cluster, statement="SELECT VALUE v FROM {0} v WHERE age > 2".format( index.full_dataset_name), index_used=True, index_name=None)) validation_results["pre_upgrade"] = all(results) if major_version >= 7.1: self.log.info("Enabling replica for analytics") self.replica_num = self.input.param('replica_num', 0) set_result = self.cbas_util.set_replica_number_from_settings( self.cluster.master, replica_num=self.replica_num) if set_result != self.replica_num: self.fail("Error while setting replica for CBAS") self.log.info( "Rebalancing for CBAS replica setting change to take " "effect.") rebalance_task, _ = self.rebalance_util.rebalance( self.cluster, kv_nodes_in=0, kv_nodes_out=0, cbas_nodes_in=0, cbas_nodes_out=0, available_servers=[], exclude_nodes=[]) if not self.rebalance_util.wait_for_rebalance_task_to_complete( rebalance_task, self.cluster): self.fail("Rebalance failed") if not self.cbas_util.wait_for_replication_to_finish(self.cluster): self.fail("Replication could not complete before timeout") if not self.cbas_util.verify_actual_number_of_replicas( self.cluster, len(self.cluster.cbas_nodes) - 1): self.fail("Actual number of replicas is different from what " "was set") self.log.info("Loading docs in default collection of existing buckets") for bucket in self.cluster.buckets: gen_load = doc_generator(self.key, self.num_items, self.num_items * 2, randomize_doc_size=True, randomize_value=True, randomize=True) async_load_task = self.task.async_load_gen_docs( self.cluster, bucket, gen_load, DocLoading.Bucket.DocOps.CREATE, active_resident_threshold=self.active_resident_threshold, timeout_secs=self.sdk_timeout, process_concurrency=8, batch_size=500, sdk_client_pool=self.sdk_client_pool) self.task_manager.get_task_result(async_load_task) # Update num_items in case of DGM run if self.active_resident_threshold != 100: self.num_items = async_load_task.doc_index bucket.scopes[CbServer.default_scope].collections[ CbServer.default_collection].num_items = self.num_items * 2 # Verify doc load count self.bucket_util._wait_for_stats_all_buckets( self.cluster, self.cluster.buckets) self.sleep(30, "Wait for num_items to get reflected") current_items = self.bucket_util.get_bucket_current_item_count( self.cluster, bucket) if current_items == self.num_items * 2: validation_results["post_upgrade_data_load"] = True else: self.log.error( "Mismatch in doc_count. Actual: %s, Expected: %s" % (current_items, self.num_items * 2)) validation_results["post_upgrade_data_load"] = False self.bucket_util.print_bucket_stats(self.cluster) if not self.cbas_util.wait_for_ingestion_all_datasets( self.cluster, self.bucket_util): validation_results["post_upgrade_data_load"] = False self.log.error("Data ingestion did not happen in the datasets") else: validation_results["post_upgrade_data_load"] = True self.log.info( "Deleting all the data from default collection of buckets created before upgrade" ) for bucket in self.cluster.buckets: gen_load = doc_generator(self.key, 0, self.num_items * 2, randomize_doc_size=True, randomize_value=True, randomize=True) async_load_task = self.task.async_load_gen_docs( self.cluster, bucket, gen_load, DocLoading.Bucket.DocOps.DELETE, active_resident_threshold=self.active_resident_threshold, timeout_secs=self.sdk_timeout, process_concurrency=8, batch_size=500, sdk_client_pool=self.sdk_client_pool) self.task_manager.get_task_result(async_load_task) # Verify doc load count self.bucket_util._wait_for_stats_all_buckets( self.cluster, self.cluster.buckets) while True: current_items = self.bucket_util.get_bucket_current_item_count( self.cluster, bucket) if current_items == 0: break else: self.sleep(30, "Wait for num_items to get reflected") bucket.scopes[CbServer.default_scope].collections[ CbServer.default_collection].num_items = 0 if major_version >= 7.0: self.log.info("Creating scopes and collections in existing bucket") scope_spec = {"name": self.cbas_util.generate_name()} self.bucket_util.create_scope_object(self.cluster.buckets[0], scope_spec) collection_spec = { "name": self.cbas_util.generate_name(), "num_items": self.num_items } self.bucket_util.create_collection_object(self.cluster.buckets[0], scope_spec["name"], collection_spec) bucket_helper = BucketHelper(self.cluster.master) status, content = bucket_helper.create_scope( self.cluster.buckets[0].name, scope_spec["name"]) if status is False: self.fail("Create scope failed for %s:%s, Reason - %s" % (self.cluster.buckets[0].name, scope_spec["name"], content)) self.bucket.stats.increment_manifest_uid() status, content = bucket_helper.create_collection( self.cluster.buckets[0].name, scope_spec["name"], collection_spec) if status is False: self.fail( "Create collection failed for %s:%s:%s, Reason - %s" % (self.cluster.buckets[0].name, scope_spec["name"], collection_spec["name"], content)) self.bucket.stats.increment_manifest_uid() self.log.info("Creating new buckets with scopes and collections") for i in range(1, self.input.param("num_buckets", 1) + 1): self.bucket_util.create_default_bucket( self.cluster, replica=self.num_replicas, compression_mode=self.compression_mode, ram_quota=bucket_size, bucket_type=self.bucket_type, storage=self.bucket_storage, eviction_policy=self.bucket_eviction_policy, bucket_durability=self.bucket_durability_level, bucket_name="bucket_{0}".format(i)) if major_version >= 7.0: self.over_ride_spec_params = self.input.param( "override_spec_params", "").split(";") self.load_data_into_buckets() else: for bucket in self.cluster.buckets[1:]: gen_load = doc_generator(self.key, 0, self.num_items, randomize_doc_size=True, randomize_value=True, randomize=True) async_load_task = self.task.async_load_gen_docs( self.cluster, bucket, gen_load, DocLoading.Bucket.DocOps.CREATE, active_resident_threshold=self.active_resident_threshold, timeout_secs=self.sdk_timeout, process_concurrency=8, batch_size=500, sdk_client_pool=self.sdk_client_pool) self.task_manager.get_task_result(async_load_task) # Update num_items in case of DGM run if self.active_resident_threshold != 100: self.num_items = async_load_task.doc_index bucket.scopes[CbServer.default_scope].collections[ CbServer.default_collection].num_items = self.num_items # Verify doc load count self.bucket_util._wait_for_stats_all_buckets( self.cluster, self.cluster.buckets) self.sleep(30, "Wait for num_items to get reflected") current_items = self.bucket_util.get_bucket_current_item_count( self.cluster, bucket) if current_items == self.num_items: validation_results["post_upgrade_KV_infra"] = True else: self.log.error( "Mismatch in doc_count. Actual: %s, Expected: %s" % (current_items, self.num_items)) validation_results["post_upgrade_KV_infra"] = False self.log.info("Create CBAS infra post upgrade and check for data " "ingestion") if major_version >= 7.0: update_spec = { "no_of_dataverses": self.input.param('no_of_dv', 2), "no_of_datasets_per_dataverse": self.input.param('ds_per_dv', 4), "no_of_synonyms": self.input.param('no_of_synonym', 2), "no_of_indexes": self.input.param('no_of_index', 3), "max_thread_count": self.input.param('no_of_threads', 10), } else: update_spec = { "no_of_dataverses": self.input.param('no_of_dv', 2), "no_of_datasets_per_dataverse": self.input.param('ds_per_dv', 4), "no_of_synonyms": 0, "no_of_indexes": self.input.param('no_of_index', 3), "max_thread_count": self.input.param('no_of_threads', 10), "dataverse": { "cardinality": 1, "creation_method": "dataverse" }, "dataset": { "creation_methods": ["cbas_dataset"], "bucket_cardinality": 1 }, "index": { "creation_method": "index" } } if update_spec["no_of_dataverses"] * update_spec[ "no_of_datasets_per_dataverse"] > 8: self.log_failure("Total number of datasets across all " "dataverses cannot be more than 8 for pre " "7.0 builds") return False if self.cbas_setup(update_spec, False): validation_results["post_upgrade_cbas_infra"] = True else: validation_results["post_upgrade_cbas_infra"] = False if major_version >= 7.1: self.cluster.rest = RestConnection(self.cluster.master) def post_replica_activation_verification(): self.log.info("Verifying doc count accross all datasets") if not self.cbas_util.validate_docs_in_all_datasets( self.cluster, self.bucket_util, timeout=600): self.log_failure( "Docs are missing after replicas become active") validation_results["post_upgrade_replica_verification"] = \ False if update_spec["no_of_indexes"]: self.log.info("Verifying CBAS indexes are working") for idx in self.cbas_util.list_all_index_objs(): statement = "Select * from {0} where age > 5 limit 10".format( idx.full_dataset_name) if not self.cbas_util.verify_index_used( self.cluster, statement, index_used=True, index_name=idx.name): self.log.info( "Index {0} on dataset {1} was not used while " "executing query".format( idx.name, idx.full_dataset_name)) self.log.info("Marking one of the CBAS nodes as failed over.") self.available_servers, kv_failover_nodes, cbas_failover_nodes =\ self.rebalance_util.failover( self.cluster, kv_nodes=0, cbas_nodes=1, failover_type="Hard", action=None, timeout=7200, available_servers=[], exclude_nodes=[self.cluster.cbas_cc_node], kv_failover_nodes=None, cbas_failover_nodes=None, all_at_once=False) post_replica_activation_verification() self.available_servers, kv_failover_nodes, cbas_failover_nodes = \ self.rebalance_util.perform_action_on_failed_over_nodes( self.cluster, action=self.input.param('action_on_failover', "FullRecovery"), available_servers=self.available_servers, kv_failover_nodes=kv_failover_nodes, cbas_failover_nodes=cbas_failover_nodes) post_replica_activation_verification() validation_results["post_upgrade_replica_verification"] = True self.log.info("Delete the bucket created before upgrade") if self.bucket_util.delete_bucket(self.cluster, self.cluster.buckets[0], wait_for_bucket_deletion=True): validation_results["bucket_delete"] = True else: validation_results["bucket_delete"] = False if validation_results["bucket_delete"]: self.log.info("Check all datasets created on the deleted bucket " "are empty") results = [] for dataset in self.cbas_util.list_all_dataset_objs( dataset_source="internal"): if dataset.kv_bucket.name == "default": if self.cbas_util.wait_for_ingestion_complete( self.cluster, dataset.full_name, 0, timeout=300): results.append(True) else: results.append(False) validation_results["bucket_delete"] = all(results) for scenario in validation_results: if validation_results[scenario]: self.log.info("{0} : Passed".format(scenario)) else: self.log.info("{0} : Failed".format(scenario)) return validation_results def load_data_into_buckets(self, doc_loading_spec=None): """ Loads data into buckets using the data spec """ self.over_ride_spec_params = self.input.param("override_spec_params", "").split(";") # Init sdk_client_pool if not initialized before if self.sdk_client_pool is None: self.init_sdk_pool_object() self.doc_spec_name = self.input.param("doc_spec", "initial_load") # Create clients in SDK client pool if self.sdk_client_pool: self.log.info("Creating required SDK clients for client_pool") bucket_count = len(self.cluster.buckets) max_clients = self.task_manager.number_of_threads clients_per_bucket = int(ceil(max_clients / bucket_count)) for bucket in self.cluster.buckets: self.sdk_client_pool.create_clients( bucket, [self.cluster.master], clients_per_bucket, compression_settings=self.sdk_compression) if not doc_loading_spec: doc_loading_spec = self.bucket_util.get_crud_template_from_package( self.doc_spec_name) self.over_ride_doc_loading_template_params(doc_loading_spec) # MB-38438, adding CollectionNotFoundException in retry exception doc_loading_spec[MetaCrudParams.RETRY_EXCEPTIONS].append( SDKException.CollectionNotFoundException) doc_loading_task = self.bucket_util.run_scenario_from_spec( self.task, self.cluster, self.cluster.buckets, doc_loading_spec, mutation_num=0, batch_size=self.batch_size) if doc_loading_task.result is False: self.fail("Initial reloading failed") # Verify initial doc load count self.bucket_util._wait_for_stats_all_buckets(self.cluster, self.cluster.buckets) self.bucket_util.validate_docs_per_collections_all_buckets( self.cluster) def over_ride_doc_loading_template_params(self, target_spec): for over_ride_param in self.over_ride_spec_params: if over_ride_param == "durability": target_spec[MetaCrudParams.DURABILITY_LEVEL] = \ self.durability_level elif over_ride_param == "sdk_timeout": target_spec[MetaCrudParams.SDK_TIMEOUT] = self.sdk_timeout elif over_ride_param == "doc_size": target_spec[MetaCrudParams.DocCrud.DOC_SIZE] = self.doc_size elif over_ride_param == "num_scopes": target_spec[MetaCrudParams.SCOPES_TO_ADD_PER_BUCKET] = int( self.input.param("num_scopes", 1)) elif over_ride_param == "num_collections": target_spec[ MetaCrudParams.COLLECTIONS_TO_ADD_FOR_NEW_SCOPES] = int( self.input.param("num_collections", 1)) elif over_ride_param == "num_items": target_spec["doc_crud"][MetaCrudParams.DocCrud.NUM_ITEMS_FOR_NEW_COLLECTIONS] = \ self.num_items def test_upgrade(self): self.log.info("Upgrading cluster nodes to target version") node_to_upgrade = self.fetch_node_to_upgrade() while node_to_upgrade is not None: self.log.info("Selected node for upgrade: %s" % node_to_upgrade.ip) if self.upgrade_type == "offline": self.upgrade_function[self.upgrade_type](node_to_upgrade, self.upgrade_version, True) else: self.upgrade_function[self.upgrade_type](node_to_upgrade, self.upgrade_version) self.cluster_util.print_cluster_stats(self.cluster) node_to_upgrade = self.fetch_node_to_upgrade() if not all(self.post_upgrade_validation().values()): self.fail("Post upgrade scenarios failed") def test_upgrade_with_failover(self): self.log.info("Upgrading cluster nodes to target version") node_to_upgrade = self.fetch_node_to_upgrade() while node_to_upgrade is not None: self.log.info("Selected node for upgrade: %s" % node_to_upgrade.ip) rest = RestConnection(node_to_upgrade) services = rest.get_nodes_services() services_on_target_node = services[(node_to_upgrade.ip + ":" + node_to_upgrade.port)] self.log.info( "Selected node services {0}".format(services_on_target_node)) if "cbas" in services_on_target_node: self.upgrade_function["failover_full_recovery"]( node_to_upgrade, False) else: self.upgrade_function[self.upgrade_type](node_to_upgrade) self.cluster_util.print_cluster_stats(self.cluster) node_to_upgrade = self.fetch_node_to_upgrade() if not all(self.post_upgrade_validation().values()): self.fail("Post upgrade scenarios failed")
class CBASClusterOperations(CBASBaseTest): def setUp(self): self.input = TestInputSingleton.input self.input.test_params.update({"default_bucket":False}) self.rebalanceServers = None self.nodeType = "KV" self.wait_for_rebalance=True super(CBASClusterOperations, self).setUp() self.num_items = self.input.param("items", 1000) self.bucket_util.create_default_bucket() # self.cbas_util.createConn("default") if 'nodeType' in self.input.test_params: self.nodeType = self.input.test_params['nodeType'] self.rebalance_both = self.input.param("rebalance_cbas_and_kv", False) if not self.rebalance_both: if self.nodeType == "KV": self.rebalanceServers = self.cluster.kv_nodes self.wait_for_rebalance=False elif self.nodeType == "CBAS": self.rebalanceServers = [self.cbas_node] + self.cluster.cbas_nodes else: self.rebalanceServers = self.cluster.kv_nodes + [self.cbas_node] + self.cluster.cbas_nodes self.nodeType = "KV" + "-" +"CBAS" self.assertTrue(len(self.rebalanceServers)>1, "Not enough %s servers to run tests."%self.rebalanceServers) self.log.info("This test will be running in %s context."%self.nodeType) self.load_gen_tasks = [] def setup_for_test(self, skip_data_loading=False): if not skip_data_loading: # Load Couchbase bucket first. self.perform_doc_ops_in_all_cb_buckets("create", 0, self.num_items) self.cbas_util.createConn(self.cb_bucket_name) # Create dataset on the CBAS bucket self.cbas_util.create_dataset_on_bucket(cbas_bucket_name=self.cb_bucket_name, cbas_dataset_name=self.cbas_dataset_name, compress_dataset=self.compress_dataset) # Create indexes on the CBAS bucket self.create_secondary_indexes = self.input.param("create_secondary_indexes",False) if self.create_secondary_indexes: self.index_fields = "profession:string,number:bigint" create_idx_statement = "create index {0} on {1}({2});".format( self.index_name, self.cbas_dataset_name, self.index_fields) status, metrics, errors, results, _ = self.cbas_util.execute_statement_on_cbas_util( create_idx_statement) self.assertTrue(status == "success", "Create Index query failed") self.assertTrue( self.cbas_util.verify_index_created(self.index_name, self.index_fields.split(","), self.cbas_dataset_name)[0]) # Connect to Bucket self.cbas_util.connect_to_bucket(cbas_bucket_name=self.cbas_bucket_name, cb_bucket_password=self.cb_bucket_password) if not skip_data_loading: # Validate no. of items in CBAS dataset if not self.cbas_util.validate_cbas_dataset_items_count( self.cbas_dataset_name, self.num_items): self.fail( "No. of items in CBAS dataset do not match that in the CB bucket") def test_rebalance_in(self): ''' Description: This will test the rebalance in feature i.e. one node coming in to the cluster. Then Rebalance. Verify that is has no effect on the data ingested to cbas. Steps: 1. Setup cbas. bucket, datasets/shadows, connect. 2. Add a node and rebalance. Don't wait for rebalance completion. 3. During rebalance, do mutations and execute queries on cbas. Author: Ritesh Agarwal/Mihir Kamdar Date Created: 18/07/2017 ''' query = "select count(*) from {0};".format(self.cbas_dataset_name) self.setup_for_test() self.cluster_util.add_node(node=self.rebalanceServers[1], rebalance=True, wait_for_rebalance_completion=self.wait_for_rebalance) self.log.info("Rebalance state:%s"%self.rest._rebalance_progress_status()) self.perform_doc_ops_in_all_cb_buckets("create", self.num_items, self.num_items * 2) self.log.info("Rebalance state:%s"%self.rest._rebalance_progress_status()) self.cbas_util._run_concurrent_queries(query,None,2000,batch_size=self.concurrent_batch_size) self.log.info("Rebalance state:%s"%self.rest._rebalance_progress_status()) if not self.cbas_util.validate_cbas_dataset_items_count(self.cbas_dataset_name, self.num_items * 2, 0): self.fail( "No. of items in CBAS dataset do not match that in the CB bucket") def test_rebalance_out(self): ''' Description: This will test the rebalance out feature i.e. one node going out of cluster. Then Rebalance. Steps: 1. Add a node, Rebalance. 2. Setup cbas. bucket, datasets/shadows, connect. 3. Remove a node and rebalance. Don't wait for rebalance completion. 4. During rebalance, do mutations and execute queries on cbas. Author: Ritesh Agarwal/Mihir Kamdar Date Created: 18/07/2017 ''' self.cluster_util.add_node(node=self.rebalanceServers[1]) query = "select count(*) from {0};".format(self.cbas_dataset_name) self.setup_for_test() otpnodes = [] nodes = self.rest.node_statuses() for node in nodes: if node.ip == self.rebalanceServers[1].ip: otpnodes.append(node) self.remove_node(otpnodes, wait_for_rebalance=self.wait_for_rebalance) self.log.info("Rebalance state:%s"%self.rest._rebalance_progress_status()) self.perform_doc_ops_in_all_cb_buckets("create", self.num_items, self.num_items * 2) self.cbas_util._run_concurrent_queries(query,"immediate",2000,batch_size=self.concurrent_batch_size) if not self.cbas_util.validate_cbas_dataset_items_count(self.cbas_dataset_name, self.num_items * 2, 0): self.fail( "No. of items in CBAS dataset do not match that in the CB bucket") def test_swap_rebalance(self): ''' Description: This will test the swap rebalance feature i.e. one node going out and one node coming in cluster. Then Rebalance. Verify that is has no effect on the data ingested to cbas. Steps: 1. Setup cbas. bucket, datasets/shadows, connect. 2. Add a node that is to be swapped against the leaving node. Do not rebalance. 3. Remove a node and rebalance. 4. During rebalance, do mutations and execute queries on cbas. Author: Ritesh Agarwal/Mihir Kamdar Date Created: 20/07/2017 ''' query = "select count(*) from {0};".format(self.cbas_dataset_name) self.setup_for_test() otpnodes=[] nodes = self.rest.node_statuses() if self.nodeType == "KV": service = ["kv"] else: service = ["cbas"] otpnodes.append(self.cluster_util.add_node(node=self.servers[1], services=service)) self.cluster_util.add_node(node=self.servers[3], services=service,rebalance=False) self.remove_node(otpnodes, wait_for_rebalance=self.wait_for_rebalance) self.perform_doc_ops_in_all_cb_buckets("create", self.num_items, self.num_items * 2) self.cbas_util._run_concurrent_queries(query,"immediate",2000,batch_size=self.concurrent_batch_size) if not self.cbas_util.validate_cbas_dataset_items_count(self.cbas_dataset_name, self.num_items * 2, 0): self.fail( "No. of items in CBAS dataset do not match that in the CB bucket") def test_failover(self): ''' Description: This will test the node failover both graceful and hard failover based on graceful_failover param in testcase conf file. Steps: 1. Add node to the cluster which will be failed over. 2. Create docs, setup cbas. 3. Mark the node for fail over. 4. Do rebalance asynchronously. During rebalance perform mutations. 5. Run some CBAS queries. 6. Check for correct number of items in CBAS datasets. Author: Ritesh Agarwal/Mihir Kamdar Date Created: 20/07/2017 ''' #Add node which will be failed over later. self.cluster_util.add_node(node=self.rebalanceServers[1]) query = "select count(*) from {0};".format(self.cbas_dataset_name) graceful_failover = self.input.param("graceful_failover", False) self.setup_for_test() failover_task = self._cb_cluster.async_failover(self.input.servers, [self.rebalanceServers[1]], graceful_failover) self.task_manager.get_task_result(failover_task) result = self.cluster_util.rebalance() self.assertTrue(result, "Rebalance operation failed") self.perform_doc_ops_in_all_cb_buckets("create", self.num_items, self.num_items * 3 / 2) self.cbas_util._run_concurrent_queries(query,"immediate",2000,batch_size=self.concurrent_batch_size) if not self.cbas_util.validate_cbas_dataset_items_count(self.cbas_dataset_name, self.num_items * 3 / 2, 0): self.fail( "No. of items in CBAS dataset do not match that in the CB bucket") ''' -i b/resources/4-nodes-template.ini -t cbas.cbas_cluster_operations.CBASClusterOperations.test_rebalance_in_cb_cbas_together,cb_bucket_name=default,cbas_bucket_name=default_bucket,cbas_dataset_name=default_ds,items=10,nodeType=KV,rebalance_cbas_and_kv=True,wait_for_rebalace=False ''' def test_rebalance_in_cb_cbas_together(self): self.log.info("Creates cbas buckets and dataset") dataset_count_query = "select count(*) from {0};".format(self.cbas_dataset_name) self.setup_for_test() self.log.info("Rebalance in KV node") wait_for_rebalace_complete = self.input.param("wait_for_rebalace", False) self.cluster_util.add_node(node=self.rebalanceServers[1], rebalance=False, wait_for_rebalance_completion=wait_for_rebalace_complete) self.log.info("Rebalance in CBAS node") self.cluster_util.add_node(node=self.rebalanceServers[3], rebalance=True, wait_for_rebalance_completion=wait_for_rebalace_complete) self.log.info( "Perform document create as rebalance is in progress : Rebalance state:%s" % self.rest._rebalance_progress_status()) self.perform_doc_ops_in_all_cb_buckets("create", self.num_items, self.num_items * 2) self.log.info( "Run queries as rebalance is in progress : Rebalance state:%s" % self.rest._rebalance_progress_status()) handles = self.cbas_util._run_concurrent_queries(dataset_count_query, None, 2000, batch_size=self.concurrent_batch_size) self.log.info("Log concurrent query status") self.cbas_util.log_concurrent_query_outcome(self.cluster.master, handles) if not self.cbas_util.validate_cbas_dataset_items_count(self.cbas_dataset_name, self.num_items * 2, 0): self.fail("No. of items in CBAS dataset do not match that in the CB bucket") ''' -i b/resources/4-nodes-template.ini -t cbas.cbas_cluster_operations.CBASClusterOperations.test_rebalance_out_cb_cbas_together,cb_bucket_name=default,cbas_bucket_name=default_bucket,cbas_dataset_name=default_ds,items=10,nodeType=KV,rebalance_cbas_and_kv=True,wait_for_rebalace=False ''' def test_rebalance_out_cb_cbas_together(self): self.log.info("Rebalance in KV node and wait for rebalance to complete") self.cluster_util.add_node(node=self.rebalanceServers[1]) self.log.info("Rebalance in CBAS node and wait for rebalance to complete") self.cluster_util.add_node(node=self.rebalanceServers[3]) self.log.info("Creates cbas buckets and dataset") dataset_count_query = "select count(*) from {0};".format(self.cbas_dataset_name) self.setup_for_test() self.log.info("Fetch and remove nodes to rebalance out") wait_for_rebalace_complete = self.input.param("wait_for_rebalace", False) otpnodes = [] nodes = self.rest.node_statuses() for node in nodes: if node.ip == self.rebalanceServers[1].ip or node.ip == self.rebalanceServers[3].ip: otpnodes.append(node) for every_node in otpnodes: self.remove_node(otpnodes, wait_for_rebalance=wait_for_rebalace_complete) self.sleep(30, message="Sleep for 30 seconds for remove node to complete") self.log.info( "Perform document create as rebalance is in progress : Rebalance state:%s" % self.rest._rebalance_progress_status()) self.perform_doc_ops_in_all_cb_buckets("create", self.num_items, self.num_items * 2) self.log.info( "Run queries as rebalance is in progress : Rebalance state:%s" % self.rest._rebalance_progress_status()) handles = self.cbas_util._run_concurrent_queries(dataset_count_query, "immediate", 2000, batch_size=self.concurrent_batch_size) self.log.info("Log concurrent query status") self.cbas_util.log_concurrent_query_outcome(self.cluster.master, handles) if not self.cbas_util.validate_cbas_dataset_items_count(self.cbas_dataset_name, self.num_items * 2, 0): self.fail("No. of items in CBAS dataset do not match that in the CB bucket") ''' -i b/resources/4-nodes-template.ini -t cbas.cbas_cluster_operations.CBASClusterOperations.test_swap_rebalance_cb_cbas_together,cb_bucket_name=default,cbas_bucket_name=default_bucket,cbas_dataset_name=default_ds,items=10,rebalance_cbas_and_kv=True,wait_for_rebalance=True ''' def test_swap_rebalance_cb_cbas_together(self): self.log.info("Creates cbas buckets and dataset") wait_for_rebalance = self.input.param("wait_for_rebalance", True) dataset_count_query = "select count(*) from {0};".format(self.cbas_dataset_name) self.setup_for_test() self.log.info("Add KV node and don't rebalance") self.cluster_util.add_node(node=self.rebalanceServers[1], rebalance=False) self.log.info("Add cbas node and don't rebalance") self.cluster_util.add_node(node=self.rebalanceServers[3], rebalance=False) otpnodes = [] nodes = self.rest.node_statuses() for node in nodes: if node.ip == self.rebalanceServers[0].ip or node.ip == self.rebalanceServers[2].ip: otpnodes.append(node) self.log.info("Remove master node") self.remove_node(otpnode=otpnodes, wait_for_rebalance=wait_for_rebalance) self.cluster.master = self.rebalanceServers[1] self.log.info("Create instances pointing to new master nodes") c_utils = CbasUtil(self.rebalanceServers[1], self.rebalanceServers[3], self.task) c_utils.createConn(self.cb_bucket_name) self.log.info("Create reference to SDK client") client = SDKClient(scheme="couchbase", hosts=[self.rebalanceServers[1].ip], bucket=self.cb_bucket_name, password=self.rebalanceServers[1].rest_password) self.log.info("Add more document to default bucket") documents = ['{"name":"value"}'] * (self.num_items//10) document_id_prefix = "custom-id-" client.insert_custom_json_documents(document_id_prefix, documents) self.log.info( "Run queries as rebalance is in progress : Rebalance state:%s" % self.rest._rebalance_progress_status()) handles = c_utils._run_concurrent_queries(dataset_count_query, "immediate", 2000, batch_size=self.concurrent_batch_size) self.log.info("Log concurrent query status") self.cbas_util.log_concurrent_query_outcome(self.cluster.master, handles) if not c_utils.validate_cbas_dataset_items_count(self.cbas_dataset_name, self.num_items + (self.num_items//10) , 0): self.fail("No. of items in CBAS dataset do not match that in the CB bucket") def test_rebalance_in_multiple_cbas_on_a_busy_system(self): node_services = [] node_services.append(self.input.param('service',"cbas")) self.log.info("Setup CBAS") self.setup_for_test(skip_data_loading=True) self.log.info("Run KV ops in async while rebalance is in progress") json_generator = JsonGenerator() generators = json_generator.generate_docs_simple(docs_per_day=self.num_items, start=0) tasks = self.bucket_util._async_load_all_buckets(self.cluster, generators, "create", 0) self.log.info("Run concurrent queries to simulate busy system") statement = "select sleep(count(*),50000) from {0} where mutated=0;".format(self.cbas_dataset_name) handles = self.cbas_util._run_concurrent_queries(statement, self.mode, self.num_concurrent_queries) self.log.info("Rebalance in CBAS nodes") self.cluster_util.add_node(node=self.rebalanceServers[1], services=node_services, rebalance=False, wait_for_rebalance_completion=False) self.cluster_util.add_node(node=self.rebalanceServers[3], services=node_services, rebalance=True, wait_for_rebalance_completion=True) self.log.info("Get KV ops result") for task in tasks: self.task_manager.get_task_result(task) self.log.info("Log concurrent query status") self.cbas_util.log_concurrent_query_outcome(self.cluster.master, handles) if not self.cbas_util.validate_cbas_dataset_items_count(self.cbas_dataset_name, self.num_items, 0): self.fail("No. of items in CBAS dataset do not match that in the CB bucket") def test_rebalance_out_multiple_cbas_on_a_busy_system(self): node_services = [] node_services.append(self.input.param('service',"cbas")) self.log.info("Rebalance in CBAS nodes") self.cluster_util.add_node(node=self.rebalanceServers[1], services=node_services) self.cluster_util.add_node(node=self.rebalanceServers[3], services=node_services) self.log.info("Setup CBAS") self.setup_for_test(skip_data_loading=True) self.log.info("Run KV ops in async while rebalance is in progress") json_generator = JsonGenerator() generators = json_generator.generate_docs_simple(docs_per_day=self.num_items, start=0) tasks = self.bucket_util._async_load_all_buckets(self.cluster, generators, "create", 0) self.log.info("Run concurrent queries to simulate busy system") statement = "select sleep(count(*),50000) from {0} where mutated=0;".format(self.cbas_dataset_name) handles = self.cbas_util._run_concurrent_queries(statement, self.mode, self.num_concurrent_queries) self.log.info("Fetch and remove nodes to rebalance out") self.rebalance_cc = self.input.param("rebalance_cc", False) out_nodes = [] nodes = self.rest.node_statuses() if self.rebalance_cc: for node in nodes: if node.ip == self.cbas_node.ip or node.ip == self.servers[1].ip: out_nodes.append(node) self.cbas_util.closeConn() self.log.info("Reinitialize CBAS utils with ip %s, since CC node is rebalanced out" %self.servers[3].ip) self.cbas_util = CbasUtil(self.cluster.master, self.servers[3], self.task) self.cbas_util.createConn("default") else: for node in nodes: if node.ip == self.servers[3].ip or node.ip == self.servers[1].ip: out_nodes.append(node) self.log.info("Rebalance out CBAS nodes %s %s" % (out_nodes[0].ip, out_nodes[1].ip)) self.remove_all_nodes_then_rebalance([out_nodes[0],out_nodes[1]]) self.log.info("Get KV ops result") for task in tasks: self.task_manager.get_task_result(task) self.log.info("Log concurrent query status") self.cbas_util.log_concurrent_query_outcome(self.cluster.master, handles) if not self.cbas_util.validate_cbas_dataset_items_count(self.cbas_dataset_name, self.num_items, 0): self.fail("No. of items in CBAS dataset do not match that in the CB bucket") ''' cbas.cbas_cluster_operations.CBASClusterOperations.test_rebalance_swap_multiple_cbas_on_a_busy_system,cb_bucket_name=default,cbas_bucket_name=default_bucket,cbas_dataset_name=default_ds,items=10,rebalance_cbas_and_kv=True,service=cbas,rebalance_cc=False cbas.cbas_cluster_operations.CBASClusterOperations.test_rebalance_swap_multiple_cbas_on_a_busy_system,cb_bucket_name=default,cbas_bucket_name=default_bucket,cbas_dataset_name=default_ds,items=10,rebalance_cbas_and_kv=True,service=cbas,rebalance_cc=True ''' def test_rebalance_swap_multiple_cbas_on_a_busy_system(self): ''' 1. We have 4 node cluster with 1 KV and 3 CBAS. Assume the IPS end with 101(KV), 102(CBAS), 103(CBAS), 104(CBAS) 2, Post initial setup - 101 running KV and 102 running CBAS as CC node 3. As part of test test add an extra NC node that we will swap rebalance later - Adding 103 and rebalance 4. If swap rebalance NC - then select the node added in #3 for remove and 104 to add during swap 5. If swap rebalance CC - then select the CC node added for remove and 104 to add during swap ''' self.log.info('Read service input param') node_services = [] node_services.append(self.input.param('service', "cbas")) self.log.info("Rebalance in CBAS nodes, this node will be removed during swap") self.cluster_util.add_node(node=self.rebalanceServers[1], services=node_services) self.log.info("Setup CBAS") self.setup_for_test(skip_data_loading=True) self.log.info("Run KV ops in async while rebalance is in progress") json_generator = JsonGenerator() generators = json_generator.generate_docs_simple(docs_per_day=self.num_items, start=0) tasks = self.bucket_util._async_load_all_buckets(self.cluster, generators, "create", 0) self.log.info("Run concurrent queries to simulate busy system") statement = "select sleep(count(*),50000) from {0} where mutated=0;".format(self.cbas_dataset_name) handles = self.cbas_util._run_concurrent_queries(statement, self.mode, self.num_concurrent_queries) self.log.info("Fetch node to remove during rebalance") self.rebalance_cc = self.input.param("rebalance_cc", False) out_nodes = [] nodes = self.rest.node_statuses() reinitialize_cbas_util = False for node in nodes: if self.rebalance_cc and (node.ip == self.cbas_node.ip): out_nodes.append(node) reinitialize_cbas_util = True elif not self.rebalance_cc and node.ip == self.rebalanceServers[1].ip: out_nodes.append(node) self.log.info("Swap rebalance CBAS nodes") self.cluster_util.add_node(node=self.rebalanceServers[3], services=node_services, rebalance=False) self.remove_node([out_nodes[0]], wait_for_rebalance=True) self.log.info("Get KV ops result") for task in tasks: self.task_manager.get_task_result(task) if reinitialize_cbas_util is True: self.cbas_util = CbasUtil(self.cluster.master, self.rebalanceServers[3], self.task) self.cbas_util.createConn("default") self.log.info("Log concurrent query status") self.cbas_util.log_concurrent_query_outcome(self.cluster.master, handles) count_n1ql = self.rest.query_tool('select count(*) from %s' % (self.cb_bucket_name))['results'][0]['$1'] if not self.cbas_util.validate_cbas_dataset_items_count(self.cbas_dataset_name, count_n1ql, 0): self.fail("No. of items in CBAS dataset do not match that in the CB bucket") ''' test_fail_over_node_followed_by_rebalance_out_or_add_back,cb_bucket_name=default,graceful_failover=True,cbas_bucket_name=default_cbas,cbas_dataset_name=default_ds,items=10000,nodeType=KV,rebalance_out=True,concurrent_batch_size=500 test_fail_over_node_followed_by_rebalance_out_or_add_back,cb_bucket_name=default,graceful_failover=True,cbas_bucket_name=default_cbas,cbas_dataset_name=default_ds,items=10000,nodeType=KV,rebalance_out=False,recovery_strategy=full,concurrent_batch_size=500 test_fail_over_node_followed_by_rebalance_out_or_add_back,cb_bucket_name=default,graceful_failover=True,cbas_bucket_name=default_cbas,cbas_dataset_name=default_ds,items=10000,nodeType=KV,rebalance_out=False,recovery_strategy=delta,concurrent_batch_size=500 test_fail_over_node_followed_by_rebalance_out_or_add_back,cb_bucket_name=default,graceful_failover=False,cbas_bucket_name=default_cbas,cbas_dataset_name=default_ds,items=10000,nodeType=KV,rebalance_out=True,concurrent_batch_size=500 test_fail_over_node_followed_by_rebalance_out_or_add_back,cb_bucket_name=default,graceful_failover=False,cbas_bucket_name=default_cbas,cbas_dataset_name=default_ds,items=10000,nodeType=KV,rebalance_out=False,recovery_strategy=full,concurrent_batch_size=500 test_fail_over_node_followed_by_rebalance_out_or_add_back,cb_bucket_name=default,graceful_failover=False,cbas_bucket_name=default_cbas,cbas_dataset_name=default_ds,items=10000,nodeType=KV,rebalance_out=False,recovery_strategy=delta,concurrent_batch_size=500 test_fail_over_node_followed_by_rebalance_out_or_add_back,cb_bucket_name=default,graceful_failover=False,cbas_bucket_name=default_cbas,cbas_dataset_name=default_ds,items=10000,nodeType=CBAS,rebalance_out=True,concurrent_batch_size=500 test_fail_over_node_followed_by_rebalance_out_or_add_back,cb_bucket_name=default,graceful_failover=False,cbas_bucket_name=default_cbas,cbas_dataset_name=default_ds,items=10000,nodeType=CBAS,rebalance_out=False,recovery_strategy=full,concurrent_batch_size=500 ''' def test_fail_over_node_followed_by_rebalance_out_or_add_back(self): """ 1. Start with an initial setup, having 1 KV and 1 CBAS 2. Add a node that will be failed over - KV/CBAS 3. Create CBAS buckets and dataset 4. Fail over the KV node based in graceful_failover parameter specified 5. Rebalance out/add back based on input param specified in conf file 6. Perform doc operations 7. run concurrent queries 8. Verify document count on dataset post failover """ self.log.info("Add an extra node to fail-over") self.cluster_util.add_node(node=self.rebalanceServers[1]) self.log.info("Read the failure out type to be performed") graceful_failover = self.input.param("graceful_failover", True) self.log.info("Set up test - Create cbas buckets and data-sets") self.setup_for_test() self.log.info("Perform Async doc operations on KV") json_generator = JsonGenerator() generators = json_generator.generate_docs_simple(docs_per_day=self.num_items * 3 / 2, start=self.num_items) kv_task = self.bucket_util._async_load_all_buckets(self.cluster, generators, "create", 0) self.log.info("Run concurrent queries on CBAS") query = "select count(*) from {0};".format(self.cbas_dataset_name) handles = self.cbas_util._run_concurrent_queries(query, "async", self.num_concurrent_queries, batch_size=self.concurrent_batch_size) self.log.info("fail-over the node") fail_task = self._cb_cluster.async_failover(self.input.servers, [self.rebalanceServers[1]], graceful_failover) self.task_manager.get_task_result(fail_task) self.log.info("Read input param to decide on add back or rebalance out") self.rebalance_out = self.input.param("rebalance_out", False) if self.rebalance_out: self.log.info("Rebalance out the fail-over node") result = self.cluster_util.rebalance() self.assertTrue(result, "Rebalance operation failed") else: self.recovery_strategy = self.input.param("recovery_strategy", "full") self.log.info("Performing %s recovery" % self.recovery_strategy) success = False end_time = datetime.datetime.now() + datetime.timedelta(minutes=int(1)) while datetime.datetime.now() < end_time or not success: try: self.sleep(10, message="Wait for fail over complete") self.rest.set_recovery_type('ns_1@' + self.rebalanceServers[1].ip, self.recovery_strategy) success = True except Exception: self.log.info("Fail over in progress. Re-try after 10 seconds.") pass if not success: self.fail("Recovery %s failed." % self.recovery_strategy) self.rest.add_back_node('ns_1@' + self.rebalanceServers[1].ip) result = self.cluster_util.rebalance() self.assertTrue(result, "Rebalance operation failed") self.log.info("Get KV ops result") for task in kv_task: self.task_manager.get_task_result(task) self.log.info("Log concurrent query status") self.cbas_util.log_concurrent_query_outcome(self.cluster.master, handles) self.log.info("Validate dataset count on CBAS") count_n1ql = self.rest.query_tool('select count(*) from `%s`' % self.cb_bucket_name)['results'][0]['$1'] if not self.cbas_util.validate_cbas_dataset_items_count(self.cbas_dataset_name, count_n1ql, 0, timeout=400, analytics_timeout=400): self.fail("No. of items in CBAS dataset do not match that in the CB bucket") ''' test_to_fail_initial_rebalance_and_verify_subsequent_rebalance_succeeds,cb_bucket_name=default,cbas_bucket_name=default_bucket,cbas_dataset_name=default_ds,items=10,nodeType=CBAS,num_queries=10,restart_couchbase_on_incoming_or_outgoing_node=True,rebalance_type=in test_to_fail_initial_rebalance_and_verify_subsequent_rebalance_succeeds,cb_bucket_name=default,cbas_bucket_name=default_bucket,cbas_dataset_name=default_ds,items=10,nodeType=CBAS,num_queries=10,restart_couchbase_on_incoming_or_outgoing_node=True,rebalance_type=out test_to_fail_initial_rebalance_and_verify_subsequent_rebalance_succeeds,cb_bucket_name=default,cbas_bucket_name=default_bucket,cbas_dataset_name=default_ds,items=10,nodeType=CBAS,num_queries=10,restart_couchbase_on_incoming_or_outgoing_node=True,rebalance_type=swap ''' def test_to_fail_initial_rebalance_and_verify_subsequent_rebalance_succeeds(self): self.log.info("Pick the incoming and outgoing nodes during rebalance") self.rebalance_type = self.input.param("rebalance_type", "in") nodes_to_add = [self.rebalanceServers[1]] nodes_to_remove = [] reinitialize_cbas_util = False if self.rebalance_type == 'out': nodes_to_remove.append(self.rebalanceServers[1]) self.cluster_util.add_node(self.rebalanceServers[1]) nodes_to_add = [] elif self.rebalance_type == 'swap': self.cluster_util.add_node(nodes_to_add[0], rebalance=False) nodes_to_remove.append(self.cbas_node) reinitialize_cbas_util = True self.log.info("Incoming nodes - %s, outgoing nodes - %s. For rebalance type %s " %(nodes_to_add, nodes_to_remove, self.rebalance_type)) self.log.info("Creates cbas buckets and dataset") dataset_count_query = "select count(*) from {0};".format(self.cbas_dataset_name) self.setup_for_test() self.log.info("Perform async doc operations on KV") json_generator = JsonGenerator() generators = json_generator.generate_docs_simple(docs_per_day=self.num_items * 3 / 2, start=self.num_items) kv_task = self.bucket_util._async_load_all_buckets(self.cluster, generators, "create", 0, batch_size=5000) self.log.info("Run concurrent queries on CBAS") handles = self.cbas_util._run_concurrent_queries(dataset_count_query, "async", self.num_concurrent_queries) self.log.info("Fetch the server to restart couchbase on") restart_couchbase_on_incoming_or_outgoing_node = self.input.param("restart_couchbase_on_incoming_or_outgoing_node", True) if not restart_couchbase_on_incoming_or_outgoing_node: node = self.cbas_node else: node = self.rebalanceServers[1] shell = RemoteMachineShellConnection(node) self.log.info("Rebalance nodes") self.task.async_rebalance(self.servers, nodes_to_add, nodes_to_remove) self.log.info("Restart Couchbase on node %s" % node.ip) shell.restart_couchbase() self.sleep(30, message="Waiting for service to be back again...") self.log.info("Verify subsequent rebalance is successful") nodes_to_add = [] # Node is already added to cluster in previous rebalance, adding it again will throw exception self.assertTrue(self.task.rebalance(self.servers, nodes_to_add, nodes_to_remove)) if reinitialize_cbas_util is True: self.cbas_util = CbasUtil(self.cluster.master, self.rebalanceServers[1], self.task) self.cbas_util.createConn("default") self.cbas_util.wait_for_cbas_to_recover() self.log.info("Get KV ops result") for task in kv_task: self.task_manager.get_task_result(task) self.log.info("Log concurrent query status") self.cbas_util.log_concurrent_query_outcome(self.cluster.master, handles) self.log.info("Validate dataset count on CBAS") if not self.cbas_util.validate_cbas_dataset_items_count(self.cbas_dataset_name, self.num_items * 3 / 2, 0): self.fail("No. of items in CBAS dataset do not match that in the CB bucket") def test_auto_retry_failed_rebalance(self): # Auto-retry rebalance settings body = {"enabled": "true", "afterTimePeriod": self.retry_time, "maxAttempts": self.num_retries} rest = RestConnection(self.cluster.master) rest.set_retry_rebalance_settings(body) result = rest.get_retry_rebalance_settings() self.log.info("Pick the incoming and outgoing nodes during rebalance") self.rebalance_type = self.input.param("rebalance_type", "in") nodes_to_add = [self.rebalanceServers[1]] nodes_to_remove = [] reinitialize_cbas_util = False if self.rebalance_type == 'out': nodes_to_remove.append(self.rebalanceServers[1]) self.cluster_util.add_node(self.rebalanceServers[1]) nodes_to_add = [] elif self.rebalance_type == 'swap': self.cluster_util.add_node(nodes_to_add[0], rebalance=False) nodes_to_remove.append(self.cbas_node) reinitialize_cbas_util = True self.log.info("Incoming nodes - %s, outgoing nodes - %s. For rebalance type %s " % ( nodes_to_add, nodes_to_remove, self.rebalance_type)) self.log.info("Creates cbas buckets and dataset") dataset_count_query = "select count(*) from {0};".format(self.cbas_dataset_name) self.setup_for_test() self.log.info("Perform async doc operations on KV") json_generator = JsonGenerator() generators = json_generator.generate_docs_simple(docs_per_day=self.num_items * 3 / 2, start=self.num_items) kv_task = self.bucket_util._async_load_all_buckets(self.cluster, generators, "create", 0, batch_size=5000) self.log.info("Run concurrent queries on CBAS") handles = self.cbas_util._run_concurrent_queries(dataset_count_query, "async", self.num_concurrent_queries) self.log.info("Fetch the server to restart couchbase on") restart_couchbase_on_incoming_or_outgoing_node = self.input.param( "restart_couchbase_on_incoming_or_outgoing_node", True) if not restart_couchbase_on_incoming_or_outgoing_node: node = self.cbas_node else: node = self.rebalanceServers[1] shell = RemoteMachineShellConnection(node) try: self.log.info("Rebalance nodes") self.task.async_rebalance(self.servers, nodes_to_add, nodes_to_remove) self.sleep(10, message="Restarting couchbase after 10s on node %s" % node.ip) shell.restart_couchbase() self.sleep(30, message="Waiting for service to be back again...") self.sleep(self.retry_time, "Wait for retry time to complete and then check the rebalance results") reached = RestHelper(self.rest).rebalance_reached(wait_step=120) self.log.info("Rebalance status : {0}".format(reached)) self.sleep(20) self._check_retry_rebalance_succeeded() if reinitialize_cbas_util is True: self.cbas_util = CbasUtil(self.cluster.master, self.rebalanceServers[1], self.task) self.cbas_util.createConn("default") self.cbas_util.wait_for_cbas_to_recover() self.log.info("Get KV ops result") for task in kv_task: self.task_manager.get_task_result(task) self.log.info("Log concurrent query status") self.cbas_util.log_concurrent_query_outcome(self.cluster.master, handles) self.log.info("Validate dataset count on CBAS") if not self.cbas_util.validate_cbas_dataset_items_count(self.cbas_dataset_name, self.num_items * 3 / 2, 0): self.fail("No. of items in CBAS dataset do not match that in the CB bucket") except Exception as e: self.fail("Some exception occurred : {0}".format(e.message)) finally: body = {"enabled": "false"} rest.set_retry_rebalance_settings(body) ''' test_rebalance_on_nodes_running_multiple_services,cb_bucket_name=default,cbas_bucket_name=default_bucket,cbas_dataset_name=default_ds,items=10,nodeType=KV,num_queries=10,rebalance_type=in test_rebalance_on_nodes_running_multiple_services,cb_bucket_name=default,cbas_bucket_name=default_bucket,cbas_dataset_name=default_ds,items=10,nodeType=KV,num_queries=10,rebalance_type=out test_rebalance_on_nodes_running_multiple_services,cb_bucket_name=default,cbas_bucket_name=default_bucket,cbas_dataset_name=default_ds,items=10,num_queries=10,rebalance_type=swap,rebalance_cbas_and_kv=True ''' def test_rebalance_on_nodes_running_multiple_services(self): self.log.info("Pick the incoming and outgoing nodes during rebalance") active_services = ['cbas,fts,kv'] self.rebalance_type = self.input.param("rebalance_type", "in") nodes_to_add = [self.rebalanceServers[1]] nodes_to_remove = [] if self.rebalance_type == 'out': # This node will be rebalanced out nodes_to_remove.append(self.rebalanceServers[1]) # Will be running services as specified in the list - active_services self.cluster_util.add_node(nodes_to_add[0], services=active_services) # No nodes to remove so making the add notes empty nodes_to_add = [] elif self.rebalance_type == 'swap': # Below node will be swapped with the incoming node specified in nodes_to_add self.cluster_util.add_node(nodes_to_add[0], services=active_services) nodes_to_add = [] nodes_to_add.append(self.rebalanceServers[3]) # Below node will be removed and swapped with node that was added earlier nodes_to_remove.append(self.rebalanceServers[1]) self.log.info("Incoming nodes - %s, outgoing nodes - %s. For rebalance type %s " % ( nodes_to_add, nodes_to_remove, self.rebalance_type)) self.log.info("Creates cbas buckets and dataset") dataset_count_query = "select count(*) from {0};".format(self.cbas_dataset_name) self.setup_for_test() self.log.info("Perform async doc operations on KV") json_generator = JsonGenerator() generators = json_generator.generate_docs_simple(docs_per_day=self.num_items * 3 / 2, start=self.num_items) kv_task = self.bucket_util._async_load_all_buckets(self.cluster, generators, "create", 0, batch_size=5000) self.log.info("Run concurrent queries on CBAS") handles = self.cbas_util._run_concurrent_queries(dataset_count_query, "async", self.num_concurrent_queries) self.log.info("Rebalance nodes") # Do not add node to nodes_to_add if already added as add_node earlier self.task.rebalance(self.servers, nodes_to_add, nodes_to_remove, services=active_services) self.log.info("Get KV ops result") for task in kv_task: self.task_manager.get_task_result(task) self.log.info("Log concurrent query status") self.cbas_util.log_concurrent_query_outcome(self.cluster.master, handles) self.log.info("Validate dataset count on CBAS") if not self.cbas_util.validate_cbas_dataset_items_count(self.cbas_dataset_name, self.num_items * 3 / 2, 0): self.fail("No. of items in CBAS dataset do not match that in the CB bucket") def tearDown(self): super(CBASClusterOperations, self).tearDown() def _check_retry_rebalance_succeeded(self): rest = RestConnection(self.cluster.master) result = json.loads(rest.get_pending_rebalance_info()) self.log.info(result) if "retry_rebalance" in result and result["retry_rebalance"] != "not_pending": retry_after_secs = result["retry_after_secs"] attempts_remaining = result["attempts_remaining"] retry_rebalance = result["retry_rebalance"] self.log.info("Attempts remaining : {0}, Retry rebalance : {1}".format(attempts_remaining, retry_rebalance)) while attempts_remaining: # wait for the afterTimePeriod for the failed rebalance to restart self.sleep(retry_after_secs, message="Waiting for the afterTimePeriod to complete") try: result = self.rest.monitorRebalance() msg = "monitoring rebalance {0}" self.log.info(msg.format(result)) except Exception: result = json.loads(self.rest.get_pending_rebalance_info()) self.log.info(result) try: attempts_remaining = result["attempts_remaining"] retry_rebalance = result["retry_rebalance"] retry_after_secs = result["retry_after_secs"] except KeyError: self.fail("Retrying of rebalance still did not help. All the retries exhausted...") self.log.info("Attempts remaining : {0}, Retry rebalance : {1}".format(attempts_remaining, retry_rebalance)) else: self.log.info("Retry rebalanced fixed the rebalance failure") break
class IngestionInterrupt_CBAS(CBASBaseTest): def setUp(self): self.input = TestInputSingleton.input self.input.test_params.update({"default_bucket": False}) super(IngestionInterrupt_CBAS, self).setUp() if "add_all_cbas_nodes" in self.input.test_params \ and self.input.test_params["add_all_cbas_nodes"] \ and len(self.cluster.cbas_nodes) > 0: self.otpNodes.extend( self.cluster_util.add_all_nodes_then_rebalance( self.cluster, self.cluster.cbas_nodes)) self.bucket_util.create_default_bucket(self.cluster, storage=self.bucket_storage) self.cb_bucket_name = self.input.param('cb_bucket_name', 'default') self.cbas_util.createConn("default") def setup_for_test(self, skip_data_loading=False): if not skip_data_loading: # Load Couchbase bucket first. self.perform_doc_ops_in_all_cb_buckets("create", 0, self.num_items, batch_size=1000) self.bucket_util.verify_stats_all_buckets(self.cluster, self.num_items) # Create dataset on the CBAS bucket self.cbas_util.create_dataset_on_bucket( cbas_bucket_name=self.cb_bucket_name, cbas_dataset_name=self.cbas_dataset_name) # Create indexes on the CBAS bucket self.create_secondary_indexes = self.input.param( "create_secondary_indexes", False) if self.create_secondary_indexes: self.index_fields = "profession:string,number:bigint" create_idx_statement = "create index {0} on {1}({2});".format( self.index_name, self.cbas_dataset_name, self.index_fields) status, metrics, errors, results, _ = self.cbas_util.execute_statement_on_cbas_util( create_idx_statement) self.assertTrue(status == "success", "Create Index query failed") self.assertTrue( self.cbas_util.verify_index_created( self.index_name, self.index_fields.split(","), self.cbas_dataset_name)[0]) # Connect to Bucket self.cbas_util.connect_to_bucket( cbas_bucket_name=self.cbas_bucket_name, cb_bucket_password=self.cb_bucket_password) if not skip_data_loading: # Validate no. of items in CBAS dataset if not self.cbas_util.validate_cbas_dataset_items_count( self.cbas_dataset_name, self.num_items): self.fail( "No. of items in CBAS dataset do not match that in the CB bucket" ) def ingestion_in_progress(self): self.cbas_util.disconnect_from_bucket(self.cbas_bucket_name) self.perform_doc_ops_in_all_cb_buckets("create", self.num_items, self.num_items * 3, batch_size=1000) self.cbas_util.connect_to_bucket( cbas_bucket_name=self.cbas_bucket_name, cb_bucket_password=self.cb_bucket_password) def test_service_restart(self): self.setup_for_test() self.restart_method = self.input.param('restart_method', None) self.cbas_node_type = self.input.param('cbas_node_type', None) query = "select sleep(count(*),50000) from {0};".format( self.cbas_dataset_name) handles = self.cbas_util._run_concurrent_queries(query, "async", 10) self.ingestion_in_progress() if self.cbas_node_type == "CC": node_in_test = self.cbas_node else: node_in_test = self.cluster.cbas_nodes[0] items_in_cbas_bucket, _ = self.cbas_util.get_num_items_in_cbas_dataset( self.cbas_dataset_name) self.log.info("Items before service restart: %s" % items_in_cbas_bucket) if self.restart_method == "graceful": self.log.info("Gracefully re-starting service on node %s" % node_in_test) NodeHelper.do_a_warm_up(node_in_test) NodeHelper.wait_service_started(node_in_test) else: self.log.info("Kill Memcached process on node %s" % node_in_test) shell = RemoteMachineShellConnection(node_in_test) shell.kill_memcached() items_in_cbas_bucket = 0 start_time = time.time() while (items_in_cbas_bucket == 0 or items_in_cbas_bucket == -1) and time.time() < start_time + 60: try: items_in_cbas_bucket, _ = self.cbas_util.get_num_items_in_cbas_dataset( self.cbas_dataset_name) except: pass self.log.info( "After graceful service restart docs in CBAS bucket : %s" % items_in_cbas_bucket) if items_in_cbas_bucket < self.num_items * 3 and items_in_cbas_bucket > self.num_items: self.log.info("Data Ingestion Interrupted successfully") elif items_in_cbas_bucket < self.num_items: self.log.info( "Data Ingestion did interrupted and restarting from 0.") else: self.log.info( "Data Ingestion did not interrupted but complete before service restart." ) run_count = 0 fail_count = 0 success_count = 0 aborted_count = 0 shell = RemoteMachineShellConnection(node_in_test) for handle in handles: status, hand = self.cbas_util.retrieve_request_status_using_handle( node_in_test, handle, shell) if status == "running": run_count += 1 self.log.info("query with handle %s is running." % handle) elif status == "failed": fail_count += 1 self.log.info("query with handle %s is failed." % handle) elif status == "success": success_count += 1 self.log.info("query with handle %s is successful." % handle) else: aborted_count += 1 self.log.info("Queued job is deleted: %s" % status) self.log.info("After service restart %s queued jobs are Running." % run_count) self.log.info("After service restart %s queued jobs are Failed." % fail_count) self.log.info("After service restart %s queued jobs are Successful." % success_count) self.log.info("After service restart %s queued jobs are Aborted." % aborted_count) if self.cbas_node_type == "NC": self.assertTrue(fail_count + aborted_count == 0, "Some queries failed/aborted") query = "select count(*) from {0};".format(self.cbas_dataset_name) self.cbas_util._run_concurrent_queries(query, "immediate", 100) if not self.cbas_util.validate_cbas_dataset_items_count( self.cbas_dataset_name, self.num_items * 3): self.fail( "No. of items in CBAS dataset do not match that in the CB bucket" ) def test_kill_analytics_service(self): self.setup_for_test() process_name = self.input.param('process_name', None) service_name = self.input.param('service_name', None) cbas_node_type = self.input.param('cbas_node_type', None) query = "select sleep(count(*),50000) from {0};".format( self.cbas_dataset_name) handles = self.cbas_util._run_concurrent_queries(query, "async", 10) self.ingestion_in_progress() if cbas_node_type == "CC": node_in_test = self.cbas_node else: node_in_test = self.cluster.cbas_nodes[0] items_in_cbas_bucket, _ = self.cbas_util.get_num_items_in_cbas_dataset( self.cbas_dataset_name) self.log.info("Items before service kill: %s" % items_in_cbas_bucket) self.log.info("Kill %s process on node %s" % (process_name, node_in_test)) shell = RemoteMachineShellConnection(node_in_test) shell.kill_process(process_name, service_name) items_in_cbas_bucket = 0 start_time = time.time() while (items_in_cbas_bucket == 0 or items_in_cbas_bucket == -1) and time.time() < start_time + 60: try: items_in_cbas_bucket, _ = self.cbas_util.get_num_items_in_cbas_dataset( self.cbas_dataset_name) except: pass # start_time = time.time() # while items_in_cbas_bucket <=0 and time.time()<start_time+120: # items_in_cbas_bucket, _ = self.cbas_util.get_num_items_in_cbas_dataset(self.cbas_dataset_name) # self.sleep(1) # items_in_cbas_bucket, _ = self.cbas_util.get_num_items_in_cbas_dataset(self.cbas_dataset_name) self.log.info("After %s kill, docs in CBAS bucket : %s" % (process_name, items_in_cbas_bucket)) if items_in_cbas_bucket < self.num_items * 3 and items_in_cbas_bucket > self.num_items: self.log.info("Data Ingestion Interrupted successfully") elif items_in_cbas_bucket < self.num_items: self.log.info( "Data Ingestion did not interrupted but restarting from 0.") else: self.log.info( "Data Ingestion did not interrupted but complete before service restart." ) run_count = 0 fail_count = 0 success_count = 0 aborted_count = 0 shell = RemoteMachineShellConnection(node_in_test) for handle in handles: status, hand = self.cbas_util.retrieve_request_status_using_handle( node_in_test, handle, shell) if status == "running": run_count += 1 self.log.info("query with handle %s is running." % handle) elif status == "failed": fail_count += 1 self.log.info("query with handle %s is failed." % handle) elif status == "success": success_count += 1 self.log.info("query with handle %s is successful." % handle) else: aborted_count += 1 self.log.info("Queued job is deleted: %s" % status) self.log.info("After service restart %s queued jobs are Running." % run_count) self.log.info("After service restart %s queued jobs are Failed." % fail_count) self.log.info("After service restart %s queued jobs are Successful." % success_count) self.log.info("After service restart %s queued jobs are Aborted." % aborted_count) if cbas_node_type == "NC": self.assertTrue((fail_count + aborted_count) == 0, "Some queries failed/aborted") query = "select count(*) from {0};".format(self.cbas_dataset_name) self.cbas_util._run_concurrent_queries(query, "immediate", 100) if not self.cbas_util.validate_cbas_dataset_items_count( self.cbas_dataset_name, self.num_items * 3): self.fail( "No. of items in CBAS dataset do not match that in the CB bucket" ) def test_stop_start_service_ingest_data(self): self.setup_for_test() self.cbas_node_type = self.input.param('cbas_node_type', None) query = "select sleep(count(*),50000) from {0};".format( self.cbas_dataset_name) handles = self.cbas_util._run_concurrent_queries(query, "async", 10) self.ingestion_in_progress() if self.cbas_node_type == "CC": node_in_test = self.cbas_node self.cbas_util.closeConn() self.cbas_util = CbasUtil(self.cluster.master, self.cluster.cbas_nodes[0]) self.cbas_util.createConn("default") else: node_in_test = self.cluster.cbas_nodes[0] items_in_cbas_bucket, _ = self.cbas_util.get_num_items_in_cbas_dataset( self.cbas_dataset_name) self.log.info("Items before service restart: %s" % items_in_cbas_bucket) self.log.info("Gracefully stopping service on node %s" % node_in_test) NodeHelper.stop_couchbase(node_in_test) NodeHelper.start_couchbase(node_in_test) NodeHelper.wait_service_started(node_in_test) # self.sleep(10, "wait for service to come up.") # # items_in_cbas_bucket, _ = self.cbas_util.get_num_items_in_cbas_dataset(self.cbas_dataset_name) # self.log.info("After graceful STOPPING/STARTING service docs in CBAS bucket : %s"%items_in_cbas_bucket) # # start_time = time.time() # while items_in_cbas_bucket <=0 and time.time()<start_time+60: # items_in_cbas_bucket, _ = self.cbas_util.get_num_items_in_cbas_dataset(self.cbas_dataset_name) # self.sleep(1) items_in_cbas_bucket = 0 start_time = time.time() while (items_in_cbas_bucket == 0 or items_in_cbas_bucket == -1) and time.time() < start_time + 60: try: items_in_cbas_bucket, _ = self.cbas_util.get_num_items_in_cbas_dataset( self.cbas_dataset_name) except: pass if items_in_cbas_bucket < self.num_items * 3 and items_in_cbas_bucket > self.num_items: self.log.info("Data Ingestion Interrupted successfully") elif items_in_cbas_bucket < self.num_items: self.log.info( "Data Ingestion did not interrupted but restarting from 0.") else: self.log.info( "Data Ingestion did not interrupted but complete before service restart." ) run_count = 0 fail_count = 0 success_count = 0 aborted_count = 0 shell = RemoteMachineShellConnection(node_in_test) for handle in handles: status, hand = self.cbas_util.retrieve_request_status_using_handle( node_in_test, handle, shell) if status == "running": run_count += 1 self.log.info("query with handle %s is running." % handle) elif status == "failed": fail_count += 1 self.log.info("query with handle %s is failed." % handle) elif status == "success": success_count += 1 self.log.info("query with handle %s is successful." % handle) else: aborted_count += 1 self.log.info("Queued job is deleted: %s" % status) self.log.info("After service restart %s queued jobs are Running." % run_count) self.log.info("After service restart %s queued jobs are Failed." % fail_count) self.log.info("After service restart %s queued jobs are Successful." % success_count) self.log.info("After service restart %s queued jobs are Aborted." % aborted_count) if self.cbas_node_type == "NC": self.assertTrue(fail_count + aborted_count == 0, "Some queries failed/aborted") query = "select count(*) from {0};".format(self.cbas_dataset_name) self.cbas_util._run_concurrent_queries(query, "immediate", 100) if not self.cbas_util.validate_cbas_dataset_items_count( self.cbas_dataset_name, self.num_items * 3): self.fail( "No. of items in CBAS dataset do not match that in the CB bucket" ) def test_disk_full_ingest_data(self): self.cbas_node_type = self.input.param('cbas_node_type', None) if self.cbas_node_type == "CC": node_in_test = self.cbas_node self.cbas_util = CbasUtil(self.cluster.master, self.cluster.cbas_nodes[0]) else: node_in_test = self.cluster.cbas_nodes[0] remote_client = RemoteMachineShellConnection(node_in_test) output, error = remote_client.execute_command("rm -rf full_disk*", use_channel=True) remote_client.log_command_output(output, error) self.setup_for_test() query = "select sleep(count(*),50000) from {0};".format( self.cbas_dataset_name) handles = self.cbas_util._run_concurrent_queries(query, "async", 10) def _get_disk_usage_in_MB(remote_client): disk_info = remote_client.get_disk_info(in_MB=True) disk_space = disk_info[1].split()[-3][:-1] return disk_space du = int(_get_disk_usage_in_MB(remote_client)) - 50 chunk_size = 1024 while int(du) > 0: output, error = remote_client.execute_command( "dd if=/dev/zero of=full_disk{0} bs={1}M count=1".format( str(du) + "_MB" + str(time.time()), chunk_size), use_channel=True) remote_client.log_command_output(output, error) du -= 1024 if du < 1024: chunk_size = du self.ingestion_in_progress() items_in_cbas_bucket_before, _ = self.cbas_util.get_num_items_in_cbas_dataset( self.cbas_dataset_name) items_in_cbas_bucket_after, _ = self.cbas_util.get_num_items_in_cbas_dataset( self.cbas_dataset_name) try: while items_in_cbas_bucket_before != items_in_cbas_bucket_after: items_in_cbas_bucket_before, _ = self.cbas_util.get_num_items_in_cbas_dataset( self.cbas_dataset_name) self.sleep(2) items_in_cbas_bucket_after, _ = self.cbas_util.get_num_items_in_cbas_dataset( self.cbas_dataset_name) except: self.log.info("Ingestion interrupted and server seems to be down") if items_in_cbas_bucket_before == self.num_items * 3: self.log.info("Data Ingestion did not interrupted but completed.") elif items_in_cbas_bucket_before < self.num_items * 3: self.log.info("Data Ingestion Interrupted successfully") output, error = remote_client.execute_command("rm -rf full_disk*", use_channel=True) remote_client.log_command_output(output, error) remote_client.disconnect() self.sleep( 10, "wait for service to come up after disk space is made available.") run_count = 0 fail_count = 0 success_count = 0 aborted_count = 0 shell = RemoteMachineShellConnection(node_in_test) for handle in handles: status, hand = self.cbas_util.retrieve_request_status_using_handle( node_in_test, handle, shell) if status == "running": run_count += 1 self.log.info("query with handle %s is running." % handle) elif status == "failed": fail_count += 1 self.log.info("query with handle %s is failed." % handle) elif status == "success": success_count += 1 self.log.info("query with handle %s is successful." % handle) else: aborted_count += 1 self.log.info("Queued job is deleted: %s" % status) self.log.info("After service restart %s queued jobs are Running." % run_count) self.log.info("After service restart %s queued jobs are Failed." % fail_count) self.log.info("After service restart %s queued jobs are Successful." % success_count) self.log.info("After service restart %s queued jobs are Aborted." % aborted_count) if self.cbas_node_type == "NC": self.assertTrue(fail_count + aborted_count == 0, "Some queries failed/aborted") self.sleep(60) query = "select count(*) from {0};".format(self.cbas_dataset_name) self.cbas_util._run_concurrent_queries(query, "immediate", 100) if not self.cbas_util.validate_cbas_dataset_items_count( self.cbas_dataset_name, self.num_items * 3): self.fail( "No. of items in CBAS dataset do not match that in the CB bucket" ) def test_stop_network_ingest_data(self): self.setup_for_test() self.cbas_node_type = self.input.param('cbas_node_type', None) query = "select sleep(count(*),50000) from {0};".format( self.cbas_dataset_name) handles = self.cbas_util._run_concurrent_queries(query, "async", 10) self.ingestion_in_progress() # Add the code for stop network here: if self.cbas_node_type: if self.cbas_node_type == "CC": node_in_test = self.cbas_node self.cbas_util = CbasUtil(self.cluster.master, self.cluster.cbas_nodes[0]) self.cbas_util.createConn("default") else: node_in_test = self.cluster.cbas_nodes[0] # Stop network on KV node to mimic n/w partition on KV else: node_in_test = self.cluster.master items_in_cbas_bucket_before, _ = self.cbas_util.get_num_items_in_cbas_dataset( self.cbas_dataset_name) self.log.info("Intems before network down: %s" % items_in_cbas_bucket_before) RemoteMachineShellConnection(node_in_test).stop_network("30") # self.sleep(40, "Wait for network to come up.") items_in_cbas_bucket = 0 start_time = time.time() while (items_in_cbas_bucket == 0 or items_in_cbas_bucket == -1) and time.time() < start_time + 60: try: items_in_cbas_bucket, _ = self.cbas_util.get_num_items_in_cbas_dataset( self.cbas_dataset_name) except: pass # items_in_cbas_bucket_after, _ = self.cbas_util.get_num_items_in_cbas_dataset(self.cbas_dataset_name) self.log.info("Items after network is up: %s" % items_in_cbas_bucket) # start_time = time.time() # while items_in_cbas_bucket_after <=0 and time.time()<start_time+60: # items_in_cbas_bucket_after, _ = self.cbas_util.get_num_items_in_cbas_dataset(self.cbas_dataset_name) # self.sleep(1) # items_in_cbas_bucket = items_in_cbas_bucket_after if items_in_cbas_bucket < self.num_items * 3 and items_in_cbas_bucket > self.num_items: self.log.info("Data Ingestion Interrupted successfully") elif items_in_cbas_bucket < self.num_items: self.log.info( "Data Ingestion did not interrupted but restarting from 0.") else: self.log.info( "Data Ingestion did not interrupted but complete before service restart." ) run_count = 0 fail_count = 0 success_count = 0 aborted_count = 0 shell = RemoteMachineShellConnection(node_in_test) for handle in handles: status, hand = self.cbas_util.retrieve_request_status_using_handle( node_in_test, handle, shell) if status == "running": run_count += 1 self.log.info("query with handle %s is running." % handle) elif status == "failed": fail_count += 1 self.log.info("query with handle %s is failed." % handle) elif status == "success": success_count += 1 self.log.info("query with handle %s is successful." % handle) else: aborted_count += 1 self.log.info("Queued job is deleted: %s" % status) self.log.info("After service restart %s queued jobs are Running." % run_count) self.log.info("After service restart %s queued jobs are Failed." % fail_count) self.log.info("After service restart %s queued jobs are Successful." % success_count) self.log.info("After service restart %s queued jobs are Aborted." % aborted_count) if self.cbas_node_type == "NC": self.assertTrue(fail_count + aborted_count == 0, "Some queries failed/aborted") query = "select count(*) from {0};".format(self.cbas_dataset_name) self.cbas_util._run_concurrent_queries(query, "immediate", 100) if not self.cbas_util.validate_cbas_dataset_items_count( self.cbas_dataset_name, self.num_items * 3): self.fail( "No. of items in CBAS dataset do not match that in the CB bucket" ) def test_network_hardening(self): self.setup_for_test() end = self.num_items CC = self.cbas_node NC = self.cluster.cbas_nodes KV = self.cluster.master nodes = [CC] + NC for node in nodes: for i in xrange(2): NodeHelper.enable_firewall(node) start = end end = start + self.num_items tasks = self.perform_doc_ops_in_all_cb_buckets("create", start, end, batch_size=1000, _async=True) self.sleep( 30, "Sleep after enabling firewall on node %s then disbale it." % node.ip) NodeHelper.disable_firewall(node) items_in_cbas_bucket = 0 start_time = time.time() while (items_in_cbas_bucket == 0 or items_in_cbas_bucket == -1) and time.time() < start_time + 60: try: items_in_cbas_bucket, _ = self.cbas_util.get_num_items_in_cbas_dataset( self.cbas_dataset_name) except: pass self.log.info("Items after network is up: %s" % items_in_cbas_bucket) if items_in_cbas_bucket < end and items_in_cbas_bucket > start: self.log.info("Data Ingestion Interrupted successfully") elif items_in_cbas_bucket < start: self.log.info( "Data Ingestion did not interrupted but restarting from 0." ) else: self.log.info( "Data Ingestion did not interrupted but complete before service restart." ) query = "select count(*) from {0};".format( self.cbas_dataset_name) self.cbas_util._run_concurrent_queries(query, "immediate", 100) for task in tasks: self.task_manager.get_task_result(task) if not self.cbas_util.validate_cbas_dataset_items_count( self.cbas_dataset_name, end): self.fail( "No. of items in CBAS dataset do not match that in the CB bucket" ) NodeHelper.enable_firewall(node, bidirectional=True) start = end end = start + self.num_items tasks = self.perform_doc_ops_in_all_cb_buckets("create", start, end, batch_size=1000, _async=True) self.sleep( 30, "Sleep after enabling firewall on CC node then disbale it." ) NodeHelper.disable_firewall(node) items_in_cbas_bucket = 0 start_time = time.time() while (items_in_cbas_bucket == 0 or items_in_cbas_bucket == -1) and time.time() < start_time + 60: try: items_in_cbas_bucket, _ = self.cbas_util.get_num_items_in_cbas_dataset( self.cbas_dataset_name) except: pass self.log.info("Items after network is up: %s" % items_in_cbas_bucket) if items_in_cbas_bucket < end and items_in_cbas_bucket > start: self.log.info("Data Ingestion Interrupted successfully") elif items_in_cbas_bucket < start: self.log.info( "Data Ingestion did not interrupted but restarting from 0." ) else: self.log.info( "Data Ingestion did not interrupted but complete before service restart." ) query = "select count(*) from {0};".format( self.cbas_dataset_name) self.cbas_util._run_concurrent_queries(query, "immediate", 100) for task in tasks: self.task_manager.get_task_result(task) if not self.cbas_util.validate_cbas_dataset_items_count( self.cbas_dataset_name, end): self.fail( "No. of items in CBAS dataset do not match that in the CB bucket" )
def setUp(self): """ Since BaseTestCase will initialize at least one cluster, we pass service for the master node of that cluster """ if not hasattr(self, "input"): self.input = TestInputSingleton.input """ In case of multi cluster setup, if cluster address family needs to be set, then this parameter is required """ if self.input.param("cluster_ip_family", ""): cluster_ip_family = self.input.param("cluster_ip_family", "").split("|") if cluster_ip_family[0] == "ipv4_only": self.input.test_params.update({ "ipv4_only": True, "ipv6_only": False }) elif cluster_ip_family[0] == "ipv6_only": self.input.test_params.update({ "ipv4_only": False, "ipv6_only": True }) elif cluster_ip_family[0] == "ipv4_ipv6": self.input.test_params.update({ "ipv4_only": True, "ipv6_only": True }) else: self.input.test_params.update({ "ipv4_only": False, "ipv6_only": False }) super(CBASBaseTest, self).setUp() """ Cluster node services. Parameter value format serv1:serv2-serv1:ser2|serv1:serv2-ser1:serv2 | -> separates services per cluster. - -> separates services on each node of the cluster. : -> separates services on a node. """ self.services_init = [ x.split("-") for x in self.input.param("services_init", "kv:n1ql:index").split("|") ] """ Number of nodes per cluster. Parameter value format num_nodes_cluster1|num_nodes_cluster2|.... | -> separates number of nodes per cluster. """ if not isinstance(self.input.param("nodes_init", 1), int): self.nodes_init = [ int(x) for x in self.input.param("nodes_init", 1).split("|") ] else: self.nodes_init = [self.input.param("nodes_init", 1)] if self._testMethodDoc: self.log.info("Starting Test: %s - %s" % (self._testMethodName, self._testMethodDoc)) else: self.log.info("Starting Test: %s" % self._testMethodName) """ Parameterized Support for multiple cluster instead of creating multiple clusters from ini file. """ self.num_of_clusters = self.input.param('num_of_clusters', 1) """ Since BaseTestCase will initialize at least one cluster, we need to modify the initialized cluster server property to correctly reflect the servers in that cluster. """ start = 0 end = self.nodes_init[0] cluster = self.cb_clusters[self.cb_clusters.keys()[0]] cluster.servers = self.servers[start:end] if "cbas" in cluster.master.services: cluster.cbas_nodes.append(cluster.master) """ Since BaseTestCase will initialize at least one cluster, we need to initialize only total clusters required - 1. """ cluster_name_format = "C%s" for i in range(1, self.num_of_clusters): start = end end += self.nodes_init[i] cluster_name = cluster_name_format % str(i + 1) cluster = CBCluster(name=cluster_name, servers=self.servers[start:end]) self.cb_clusters[cluster_name] = cluster cluster.nodes_in_cluster.append(cluster.master) cluster.kv_nodes.append(cluster.master) self.initialize_cluster(cluster_name, cluster, services=self.services_init[i][0]) cluster.master.services = self.services_init[i][0].replace( ":", ",") if "cbas" in cluster.master.services: cluster.cbas_nodes.append(cluster.master) if self.input.param("cluster_ip_family", ""): # Enforce IPv4 or IPv6 or both if cluster_ip_family[i] == "ipv4_only": status, msg = self.cluster_util.enable_disable_ip_address_family_type( cluster, True, True, False) if cluster_ip_family[i] == "ipv6_only": status, msg = self.cluster_util.enable_disable_ip_address_family_type( cluster, True, False, True) if cluster_ip_family[i] == "ipv4_ipv6": status, msg = self.cluster_util.enable_disable_ip_address_family_type( cluster, True, True, True) if not status: self.fail(msg) self.modify_cluster_settings(cluster) self.available_servers = self.servers[end:] """ KV infra to be created per cluster. Accepted values are - bkt_spec : will create KV infra based on bucket spec. bucket_spec param needs to be passed. default : will create a bucket named default on the cluster. None : no buckets will be created on cluster | -> separates number of nodes per cluster. """ if self.input.param("cluster_kv_infra", None): self.cluster_kv_infra = self.input.param("cluster_kv_infra", None).split("|") if len(self.cluster_kv_infra) < self.num_of_clusters: self.cluster_kv_infra.extend( [None] * (self.num_of_clusters - len(self.cluster_kv_infra))) else: self.cluster_kv_infra = [None] * self.num_of_clusters # Common properties self.num_concurrent_queries = self.input.param('num_queries', 5000) self.concurrent_batch_size = self.input.param('concurrent_batch_size', 100) self.index_fields = self.input.param('index_fields', None) if self.index_fields: self.index_fields = self.index_fields.split("-") self.retry_time = self.input.param("retry_time", 300) self.num_retries = self.input.param("num_retries", 1) self.cbas_spec_name = self.input.param("cbas_spec", None) self.expected_error = self.input.param("error", None) self.bucket_spec = self.input.param("bucket_spec", "analytics.default") self.doc_spec_name = self.input.param("doc_spec_name", "initial_load") self.set_default_cbas_memory = self.input.param( 'set_default_cbas_memory', False) self.cbas_memory_quota_percent = int( self.input.param("cbas_memory_quota_percent", 100)) self.bucket_size = self.input.param("bucket_size", 250) self.cbas_util = CbasUtil(self.task) self.service_mem_dict = { "kv": [ CbServer.Settings.KV_MEM_QUOTA, CbServer.Settings.MinRAMQuota.KV, 0 ], "fts": [ CbServer.Settings.FTS_MEM_QUOTA, CbServer.Settings.MinRAMQuota.FTS, 0 ], "index": [ CbServer.Settings.INDEX_MEM_QUOTA, CbServer.Settings.MinRAMQuota.INDEX, 0 ], "cbas": [ CbServer.Settings.CBAS_MEM_QUOTA, CbServer.Settings.MinRAMQuota.CBAS, 0 ], } # Add nodes to the cluster as per node_init param. for i, (cluster_name, cluster) in enumerate(self.cb_clusters.items()): cluster.rest = RestConnection(cluster.master) cluster_services = self.cluster_util.get_services_map(cluster) cluster_info = cluster.rest.get_nodes_self() for service in cluster_services: if service != "n1ql": property_name = self.service_mem_dict[service][0] service_mem_in_cluster = cluster_info.__getattribute__( property_name) self.service_mem_dict[service][2] = service_mem_in_cluster j = 1 for server in cluster.servers: if server.ip != cluster.master.ip: server.services = self.services_init[i][j].replace( ":", ",") j += 1 if "cbas" in server.services: cluster.cbas_nodes.append(server) if "kv" in server.services: cluster.kv_nodes.append(server) rest = RestConnection(server) rest.set_data_path(data_path=server.data_path, index_path=server.index_path, cbas_path=server.cbas_path) if self.set_default_cbas_memory: self.log.info( "Setting the min possible memory quota so that adding " "more nodes to the cluster wouldn't be a problem.") cluster.rest.set_service_mem_quota({ CbServer.Settings.KV_MEM_QUOTA: CbServer.Settings.MinRAMQuota.KV, CbServer.Settings.FTS_MEM_QUOTA: CbServer.Settings.MinRAMQuota.FTS, CbServer.Settings.INDEX_MEM_QUOTA: CbServer.Settings.MinRAMQuota.INDEX }) self.log.info("Setting %d memory quota for CBAS" % CbServer.Settings.MinRAMQuota.CBAS) cluster.cbas_memory_quota = CbServer.Settings.MinRAMQuota.CBAS cluster.rest.set_service_mem_quota({ CbServer.Settings.CBAS_MEM_QUOTA: CbServer.Settings.MinRAMQuota.CBAS }) else: self.set_memory_for_services(cluster, server, server.services) if cluster.servers[1:]: self.task.rebalance(cluster, cluster.servers[1:], [], services=[ server.services for server in cluster.servers[1:] ]) if cluster.cbas_nodes: cbas_cc_node_ip = None retry = 0 while True and retry < 60: cbas_cc_node_ip = self.cbas_util.retrieve_cc_ip_from_master( cluster) if cbas_cc_node_ip: break else: self.sleep(10, "Waiting for CBAS service to come up") retry += 1 if not cbas_cc_node_ip: self.fail("CBAS service did not come up even after 10 " "mins.") for server in cluster.cbas_nodes: if server.ip == cbas_cc_node_ip: cluster.cbas_cc_node = server break if "cbas" in cluster.master.services: self.cbas_util.cleanup_cbas(cluster) cluster.otpNodes = cluster.rest.node_statuses() # Wait for analytics service to be up. if hasattr(cluster, "cbas_cc_node"): if not self.cbas_util.is_analytics_running(cluster): self.fail("Analytics service did not come up even after 10\ mins of wait after initialisation") if self.input.param("n2n_encryption", False): self.security_util = SecurityUtils(self.log) rest = RestConnection(cluster.master) self.log.info("Disabling Auto-Failover") if not rest.update_autofailover_settings(False, 120): self.fail("Disabling Auto-Failover failed") self.log.info("Setting node to node encryption level to all") self.security_util.set_n2n_encryption_level_on_nodes( cluster.nodes_in_cluster, level=self.input.param("n2n_encryption_level", "control")) CbServer.use_https = True self.log.info("Enabling Auto-Failover") if not rest.update_autofailover_settings(True, 300): self.fail("Enabling Auto-Failover failed") if self.input.param("analytics_loggers", None): """ This flag is used for setting analytics internal log levels. These logs are helpful while dubugging issues as they provide a deeper insight into working on CBAS service. This flag can be used to set one or more logger for analytics. logger_name_1:level-logger_name_2:level-...... """ cbas_loggers = self.input.param("analytics_loggers", None).split("-") log_level_dict = dict() for logger in cbas_loggers: tmp = logger.split(":") log_level_dict[tmp[0]] = tmp[1] self.log.info("Setting following log levels for analytics - " "{0}".format(log_level_dict)) status, content, response = self.cbas_util.set_log_level_on_cbas( self.cluster, log_level_dict, timeout=120) if not status: self.fail("Error while setting log level for CBAS - " "{0}".format(content)) self.log.info("Verifying whether log levels set successfully") status, content, response = self.cbas_util.get_log_level_on_cbas( self.cluster) match_counter = 0 if status: actual_log_levels = content["loggers"] for logger in actual_log_levels: if (logger["name"] in log_level_dict) and \ logger["level"] == log_level_dict[logger["name"]]: match_counter += 1 if match_counter == len(log_level_dict): self.log.info("All log levels were set successfully") else: self.fail("Some log levels were not set") else: self.fail("Error while fetching log levels") self.disk_optimized_thread_settings = self.input.param( "disk_optimized_thread_settings", False) if self.disk_optimized_thread_settings: self.set_num_writer_and_reader_threads( cluster, num_writer_threads="disk_io_optimized", num_reader_threads="disk_io_optimized") if self.cluster_kv_infra[i] == "bkt_spec": if self.bucket_spec is not None: try: self.collectionSetUp(cluster) except Java_base_exception as exception: self.handle_setup_exception(exception) except Exception as exception: self.handle_setup_exception(exception) else: self.fail("Error : bucket_spec param needed") elif self.cluster_kv_infra[i] == "default": self.bucket_util.create_default_bucket( cluster, bucket_type=self.bucket_type, ram_quota=self.bucket_size, replica=self.num_replicas, conflict_resolution=self.bucket_conflict_resolution_type, replica_index=self.bucket_replica_index, storage=self.bucket_storage, eviction_policy=self.bucket_eviction_policy, flush_enabled=self.flush_enabled) self.bucket_util.add_rbac_user(cluster.master) self.log.info( "=== CBAS_BASE setup was finished for test #{0} {1} ===".format( self.case_number, self._testMethodName))
def test_disk_full_ingest_data(self): self.cbas_node_type = self.input.param('cbas_node_type', None) if self.cbas_node_type == "CC": node_in_test = self.cbas_node self.cbas_util = CbasUtil(self.cluster.master, self.cluster.cbas_nodes[0]) else: node_in_test = self.cluster.cbas_nodes[0] remote_client = RemoteMachineShellConnection(node_in_test) output, error = remote_client.execute_command("rm -rf full_disk*", use_channel=True) remote_client.log_command_output(output, error) self.setup_for_test() query = "select sleep(count(*),50000) from {0};".format( self.cbas_dataset_name) handles = self.cbas_util._run_concurrent_queries(query, "async", 10) def _get_disk_usage_in_MB(remote_client): disk_info = remote_client.get_disk_info(in_MB=True) disk_space = disk_info[1].split()[-3][:-1] return disk_space du = int(_get_disk_usage_in_MB(remote_client)) - 50 chunk_size = 1024 while int(du) > 0: output, error = remote_client.execute_command( "dd if=/dev/zero of=full_disk{0} bs={1}M count=1".format( str(du) + "_MB" + str(time.time()), chunk_size), use_channel=True) remote_client.log_command_output(output, error) du -= 1024 if du < 1024: chunk_size = du self.ingestion_in_progress() items_in_cbas_bucket_before, _ = self.cbas_util.get_num_items_in_cbas_dataset( self.cbas_dataset_name) items_in_cbas_bucket_after, _ = self.cbas_util.get_num_items_in_cbas_dataset( self.cbas_dataset_name) try: while items_in_cbas_bucket_before != items_in_cbas_bucket_after: items_in_cbas_bucket_before, _ = self.cbas_util.get_num_items_in_cbas_dataset( self.cbas_dataset_name) self.sleep(2) items_in_cbas_bucket_after, _ = self.cbas_util.get_num_items_in_cbas_dataset( self.cbas_dataset_name) except: self.log.info("Ingestion interrupted and server seems to be down") if items_in_cbas_bucket_before == self.num_items * 3: self.log.info("Data Ingestion did not interrupted but completed.") elif items_in_cbas_bucket_before < self.num_items * 3: self.log.info("Data Ingestion Interrupted successfully") output, error = remote_client.execute_command("rm -rf full_disk*", use_channel=True) remote_client.log_command_output(output, error) remote_client.disconnect() self.sleep( 10, "wait for service to come up after disk space is made available.") run_count = 0 fail_count = 0 success_count = 0 aborted_count = 0 shell = RemoteMachineShellConnection(node_in_test) for handle in handles: status, hand = self.cbas_util.retrieve_request_status_using_handle( node_in_test, handle, shell) if status == "running": run_count += 1 self.log.info("query with handle %s is running." % handle) elif status == "failed": fail_count += 1 self.log.info("query with handle %s is failed." % handle) elif status == "success": success_count += 1 self.log.info("query with handle %s is successful." % handle) else: aborted_count += 1 self.log.info("Queued job is deleted: %s" % status) self.log.info("After service restart %s queued jobs are Running." % run_count) self.log.info("After service restart %s queued jobs are Failed." % fail_count) self.log.info("After service restart %s queued jobs are Successful." % success_count) self.log.info("After service restart %s queued jobs are Aborted." % aborted_count) if self.cbas_node_type == "NC": self.assertTrue(fail_count + aborted_count == 0, "Some queries failed/aborted") self.sleep(60) query = "select count(*) from {0};".format(self.cbas_dataset_name) self.cbas_util._run_concurrent_queries(query, "immediate", 100) if not self.cbas_util.validate_cbas_dataset_items_count( self.cbas_dataset_name, self.num_items * 3): self.fail( "No. of items in CBAS dataset do not match that in the CB bucket" )
def test_cc_swap_rebalance(self): self.restart_rebalance = self.input.param('restart_rebalance', False) self.setup_for_test(skip_data_loading=True) query = "select sleep(count(*),50000) from {0};".format( self.cbas_dataset_name) handles = self.cbas_util._run_concurrent_queries(query, "async", 10) self.ingestion_in_progress() replicas_before_rebalance = len( self.cbas_util.get_replicas_info(self.shell)) self.cluster_util.add_node(node=self.cluster.cbas_nodes[-1], rebalance=False) swap_nc = self.input.param('swap_nc', False) if not swap_nc: out_nodes = [self.otpNodes[0]] self.cbas_util.closeConn() self.cbas_util = CbasUtil(self.cluster.master, self.cluster.cbas_nodes[0]) self.cbas_util.createConn("default") self.cbas_node = self.cluster.cbas_nodes[0] else: out_nodes = [self.otpNodes[1]] self.cluster_util.remove_node(self.cluster, out_nodes, wait_for_rebalance=False) self.sleep(5, "Wait for sometime after rebalance started.") if self.restart_rebalance: if self.rest._rebalance_progress_status() == "running": self.assertTrue(self.rest.stop_rebalance(wait_timeout=120), "Failed while stopping rebalance.") self.sleep(10) else: self.fail( "Rebalance completed before the test could have stopped rebalance." ) self.rebalance(ejected_nodes=[node.id for node in out_nodes], wait_for_completion=False) self.sleep(5) str_time = time.time() while self.rest._rebalance_progress_status( ) == "running" and time.time() < str_time + 300: replicas = self.cbas_util.get_replicas_info(self.shell) if replicas: for replica in replicas: self.log.info("replica state during rebalance: %s" % replica['status']) self.sleep(30) replicas = self.cbas_util.get_replicas_info(self.shell) replicas_after_rebalance = len(replicas) self.assertEqual( replicas_after_rebalance, replicas_before_rebalance, "%s,%s" % (replicas_after_rebalance, replicas_before_rebalance)) for replica in replicas: self.log.info("replica state during rebalance: %s" % replica['status']) self.assertEqual( replica['status'], "IN_SYNC", "Replica state is incorrect: %s" % replica['status']) # items_in_cbas_bucket, _ = self.cbas_util.get_num_items_in_cbas_dataset(self.cbas_dataset_name) # self.log.info("Items before service restart: %s"%items_in_cbas_bucket) items_in_cbas_bucket = 0 start_time = time.time() while (items_in_cbas_bucket == 0 or items_in_cbas_bucket == -1) and time.time() < start_time + 60: try: items_in_cbas_bucket, _ = self.cbas_util.get_num_items_in_cbas_dataset( self.cbas_dataset_name) except: pass self.sleep(1) self.log.info("After rebalance operation docs in CBAS bucket : %s" % items_in_cbas_bucket) if items_in_cbas_bucket < self.num_items * 2 and items_in_cbas_bucket > self.num_items: self.log.info("Data Ingestion Interrupted successfully") elif items_in_cbas_bucket < self.num_items: self.log.info( "Data Ingestion did interrupted and restarting from 0.") else: self.log.info( "Data Ingestion did not interrupted but complete before rebalance operation." ) run_count = 0 fail_count = 0 success_count = 0 aborted_count = 0 shell = RemoteMachineShellConnection(self.cluster.master) for handle in handles: status, hand = self.cbas_util.retrieve_request_status_using_handle( self.cluster.master, handle, shell) if status == "running": run_count += 1 self.log.info("query with handle %s is running." % handle) elif status == "failed": fail_count += 1 self.log.info("query with handle %s is failed." % handle) elif status == "success": success_count += 1 self.log.info("query with handle %s is successful." % handle) else: aborted_count += 1 self.log.info("Queued job is deleted: %s" % status) self.log.info("After service restart %s queued jobs are Running." % run_count) self.log.info("After service restart %s queued jobs are Failed." % fail_count) self.log.info("After service restart %s queued jobs are Successful." % success_count) self.log.info("After service restart %s queued jobs are Aborted." % aborted_count) query = "select count(*) from {0};".format(self.cbas_dataset_name) self.cbas_util._run_concurrent_queries(query, "immediate", 100) if not self.cbas_util.validate_cbas_dataset_items_count( self.cbas_dataset_name, self.num_items * 2): self.fail( "No. of items in CBAS dataset do not match that in the CB bucket" ) self.ingest_more_data()
def setUp(self): super(AppBase, self).setUp() self.log_setup_status("AppBase", "started") self.step_num = 1 self.initial_load = self.input.param("initial_load", False) self.cluster_conf = self.input.param("cluster_conf", None) self.bucket_conf = self.input.param("bucket_conf", None) self.service_conf = self.input.param("service_conf", None) self.rbac_conf = self.input.param("rbac_conf", None) self.rbac_util = RbacUtil() self.sdk_clients = global_vars.sdk_clients self.app_path = "pytests/bucket_collections/app/" self.config_path = self.app_path + "config/" if self.cluster_conf is not None: with open(self.config_path + self.cluster_conf + ".yaml", "r") as fp: self.cluster_conf = YAML().load(fp.read()) self.__init_rebalance_with_rbac_setup() # Update cluster node-service map and create cbas_util self.cluster_util.update_cluster_nodes_service_list(self.cluster) self.cbas_util = CbasUtil(self.cluster.master, self.cluster.cbas_nodes[0]) # Load bucket conf if self.bucket_conf is not None: with open(self.config_path + self.bucket_conf + ".yaml", "r") as fp: self.bucket_conf = YAML().load(fp.read()) # Load RBAC conf if self.rbac_conf is not None: with open(self.config_path + self.rbac_conf + ".yaml", "r") as fp: self.rbac_conf = YAML().load(fp.read()) if self.bucket_conf is not None: self.__setup_buckets() self.bucket = self.cluster.buckets[0] if self.rbac_conf is not None: for rbac_roles in self.rbac_conf["rbac_roles"]: self.create_sdk_clients(rbac_roles["roles"]) if self.service_conf is not None: with open(self.config_path + self.service_conf + ".yaml", "r") as fp: self.service_conf = YAML().load(fp.read())["services"] # Configure backup settings self.configure_bucket_backups() # Create required GSIs self.create_indexes() # Create required CBAS data-sets self.create_cbas_indexes() self.log_setup_status("AppBase", "complete")
def test_to_fail_initial_rebalance_and_verify_subsequent_rebalance_succeeds(self): self.log.info("Pick the incoming and outgoing nodes during rebalance") self.rebalance_type = self.input.param("rebalance_type", "in") nodes_to_add = [self.rebalanceServers[1]] nodes_to_remove = [] reinitialize_cbas_util = False if self.rebalance_type == 'out': nodes_to_remove.append(self.rebalanceServers[1]) self.cluster_util.add_node(self.rebalanceServers[1]) nodes_to_add = [] elif self.rebalance_type == 'swap': self.cluster_util.add_node(nodes_to_add[0], rebalance=False) nodes_to_remove.append(self.cbas_node) reinitialize_cbas_util = True self.log.info("Incoming nodes - %s, outgoing nodes - %s. For rebalance type %s " %(nodes_to_add, nodes_to_remove, self.rebalance_type)) self.log.info("Creates cbas buckets and dataset") dataset_count_query = "select count(*) from {0};".format(self.cbas_dataset_name) self.setup_for_test() self.log.info("Perform async doc operations on KV") json_generator = JsonGenerator() generators = json_generator.generate_docs_simple(docs_per_day=self.num_items * 3 / 2, start=self.num_items) kv_task = self.bucket_util._async_load_all_buckets(self.cluster, generators, "create", 0, batch_size=5000) self.log.info("Run concurrent queries on CBAS") handles = self.cbas_util._run_concurrent_queries(dataset_count_query, "async", self.num_concurrent_queries) self.log.info("Fetch the server to restart couchbase on") restart_couchbase_on_incoming_or_outgoing_node = self.input.param("restart_couchbase_on_incoming_or_outgoing_node", True) if not restart_couchbase_on_incoming_or_outgoing_node: node = self.cbas_node else: node = self.rebalanceServers[1] shell = RemoteMachineShellConnection(node) self.log.info("Rebalance nodes") self.task.async_rebalance(self.servers, nodes_to_add, nodes_to_remove) self.log.info("Restart Couchbase on node %s" % node.ip) shell.restart_couchbase() self.sleep(30, message="Waiting for service to be back again...") self.log.info("Verify subsequent rebalance is successful") nodes_to_add = [] # Node is already added to cluster in previous rebalance, adding it again will throw exception self.assertTrue(self.task.rebalance(self.servers, nodes_to_add, nodes_to_remove)) if reinitialize_cbas_util is True: self.cbas_util = CbasUtil(self.cluster.master, self.rebalanceServers[1], self.task) self.cbas_util.createConn("default") self.cbas_util.wait_for_cbas_to_recover() self.log.info("Get KV ops result") for task in kv_task: self.task_manager.get_task_result(task) self.log.info("Log concurrent query status") self.cbas_util.log_concurrent_query_outcome(self.cluster.master, handles) self.log.info("Validate dataset count on CBAS") if not self.cbas_util.validate_cbas_dataset_items_count(self.cbas_dataset_name, self.num_items * 3 / 2, 0): self.fail("No. of items in CBAS dataset do not match that in the CB bucket")
def setUp(self, add_default_cbas_node=True): super(CBASBaseTest, self).setUp() if self._testMethodDoc: self.log.info("Starting Test: %s - %s" % (self._testMethodName, self._testMethodDoc)) else: self.log.info("Starting Test: %s" % self._testMethodName) invalid_ip = '10.111.151.109' self.cb_bucket_name = self.input.param('cb_bucket_name', 'travel-sample') self.cbas_bucket_name = self.input.param('cbas_bucket_name', 'travel') self.cb_bucket_password = self.input.param('cb_bucket_password', None) self.cb_server_ip = self.input.param("cb_server_ip", None) self.cb_server_ip = \ self.cb_server_ip.replace('INVALID_IP', invalid_ip) \ if self.cb_server_ip is not None else None self.cbas_dataset_name = self.input.param("cbas_dataset_name", 'travel_ds') self.cbas_bucket_name_invalid = \ self.input.param('cbas_bucket_name_invalid', self.cbas_bucket_name) self.cbas_dataset2_name = self.input.param('cbas_dataset2_name', None) self.skip_create_dataset = self.input.param('skip_create_dataset', False) self.disconnect_if_connected = \ self.input.param('disconnect_if_connected', False) self.cbas_dataset_name_invalid = \ self.input.param('cbas_dataset_name_invalid', self.cbas_dataset_name) self.skip_drop_connection = self.input.param('skip_drop_connection', False) self.skip_drop_dataset = self.input.param('skip_drop_dataset', False) self.query_id = self.input.param('query_id', None) self.mode = self.input.param('mode', None) self.num_concurrent_queries = self.input.param('num_queries', 5000) self.concurrent_batch_size = self.input.param('concurrent_batch_size', 100) self.compiler_param = self.input.param('compiler_param', None) self.compiler_param_val = self.input.param('compiler_param_val', None) self.expect_reject = self.input.param('expect_reject', False) self.expect_failure = self.input.param('expect_failure', False) self.compress_dataset = self.input.param('compress_dataset', False) self.index_name = self.input.param('index_name', "NoName") self.index_fields = self.input.param('index_fields', None) if self.index_fields: self.index_fields = self.index_fields.split("-") self.retry_time = self.input.param("retry_time", 300) self.num_retries = self.input.param("num_retries", 1) self.sample_bucket_dict = { TravelSample().name: TravelSample(), BeerSample().name: BeerSample() } self.sample_bucket = None self.flush_enabled = Bucket.FlushBucket.ENABLED self.test_abort_snapshot = self.input.param("test_abort_snapshot", False) self.cbas_spec_name = self.input.param("cbas_spec", None) self._cb_cluster = self.get_clusters() self.expected_error = self.input.param("error", None) self.bucket_spec = self.input.param("bucket_spec", None) self.doc_spec_name = self.input.param("doc_spec_name", "initial_load") self.set_cbas_memory_from_available_free_memory = self.input.param( 'set_cbas_memory_from_available_free_memory', False) self.parallel_load_percent = int( self.input.param("parallel_load_percent", 0)) self.cbas_kill_count = self.input.param("cbas_kill_count", 0) self.memcached_kill_count = self.input.param("memcached_kill_count", 0) self.tamper_links_count = self.input.param("tamper_links_count", 0) self.cbas_node = None self.cbas_memory_quota_percent = int( self.input.param("cbas_memory_quota_percent", 100)) self.bucket_size = self.input.param("bucket_size", 100) services = None nodes_init = None # Single cluster support if len(self._cb_cluster) == 1: self._cb_cluster = self._cb_cluster[0] self.cluster.nodes_in_cluster.extend([self.cluster.master]) if self.services_init and self.nodes_init >= 3: if len(self.cluster.servers) < self.nodes_init or \ len(self.services_init.split("-")) != self.nodes_init: self.fail("Configuration error. Re-check nodes_init, " "services_init in .conf file and servers " "available in .ini " "file") services = list() for service in self.services_init.split( "-")[1:self.nodes_init]: services.append(service.replace(":", ",")) # Initialize cluster using given nodes nodes_init = list( filter(lambda node: node.ip != self.cluster.master.ip, self.cluster.servers[1:self.nodes_init])) for node, services_init in map(None, nodes_init, services): if services_init is None: services.append("kv") if not self.cbas_node and "cbas" in services_init: self.cbas_node = node self.cbas_node.services = services_init idx = self.cluster.servers.index(node) self.cluster.servers[idx].services = services_init for server in self.cluster.servers: if "cbas" in server.services: self.cluster.cbas_nodes.append(server) if "kv" in server.services: self.cluster.kv_nodes.append(server) rest = RestConnection(server) rest.set_data_path(data_path=server.data_path, index_path=server.index_path, cbas_path=server.cbas_path) if self.expected_error: self.expected_error = \ self.expected_error.replace("INVALID_IP", invalid_ip) self.expected_error = \ self.expected_error.replace("PORT", self.cluster.master.port) self.otpNodes = [] self.cbas_path = server.cbas_path self.rest = RestConnection(self.cluster.master) if not self.set_cbas_memory_from_available_free_memory: self.log.info( "Setting the min possible memory quota so that adding " "more nodes to the cluster wouldn't be a problem.") self.rest.set_service_mem_quota({ CbServer.Settings.KV_MEM_QUOTA: MIN_KV_QUOTA, CbServer.Settings.FTS_MEM_QUOTA: FTS_QUOTA, CbServer.Settings.INDEX_MEM_QUOTA: INDEX_QUOTA }) self.set_cbas_memory_from_available_free_memory = \ self.input.param( 'set_cbas_memory_from_available_free_memory', False) self.log.info("Setting %d memory quota for CBAS" % CBAS_QUOTA) self.cbas_memory_quota = CBAS_QUOTA self.rest.set_service_mem_quota( {CbServer.Settings.CBAS_MEM_QUOTA: CBAS_QUOTA}) if self.expected_error: self.expected_error = \ self.expected_error.replace("INVALID_IP", invalid_ip) self.expected_error = \ self.expected_error.replace("PORT", self.cluster.master.port) self.cbas_util = None if self.cluster.cbas_nodes: if not self.cbas_node: available_cbas_nodes = list( filter(lambda node: node.ip != self.cluster.master.ip, self.cluster.cbas_nodes)) self.cbas_node = available_cbas_nodes[0] if self.set_cbas_memory_from_available_free_memory: self.set_memory_for_services(self.rest, self.cluster_util, self.cbas_node, self.cbas_node.services) self.cbas_util = CbasUtil(self.cluster.master, self.cbas_node) self.cbas_util_v2 = CbasUtilV2(self.cluster.master, self.cbas_node, self.task) if "cbas" in self.cluster.master.services: self.cleanup_cbas() if add_default_cbas_node: if self.cluster.master.ip != self.cbas_node.ip: self.otpNodes.append( self.cluster_util.add_node(self.cbas_node)) self.cluster.nodes_in_cluster.append(self.cbas_node) if nodes_init: idx = nodes_init.index(self.cbas_node) services.pop(idx) nodes_init.remove(self.cbas_node) else: self.otpNodes = self.rest.node_statuses() ''' This cbas cleanup is actually not needed. When a node is added to the cluster, it is automatically cleaned-up.''' self.cleanup_cbas() self.cluster.cbas_nodes.remove(self.cbas_node) if nodes_init: self.task.rebalance([self.cluster.master], nodes_init, [], services=services) self.cluster.nodes_in_cluster.extend(nodes_init) if self.bucket_spec is not None: try: self.collectionSetUp(self.cluster, self.bucket_util, self.cluster_util) except Java_base_exception as exception: self.handle_collection_setup_exception(exception) except Exception as exception: self.handle_collection_setup_exception(exception) else: if self.default_bucket: self.bucket_util.create_default_bucket( self.cluster, bucket_type=self.bucket_type, ram_quota=self.bucket_size, replica=self.num_replicas, conflict_resolution=self. bucket_conflict_resolution_type, replica_index=self.bucket_replica_index, storage=self.bucket_storage, eviction_policy=self.bucket_eviction_policy, flush_enabled=self.flush_enabled) elif self.cb_bucket_name in self.sample_bucket_dict.keys(): self.sample_bucket = \ self.sample_bucket_dict[self.cb_bucket_name] elif len(self._cb_cluster) > 1: # Multi Cluster Support for cluster in self._cb_cluster: for server in cluster.servers: if CbServer.Services.CBAS in server.services: cluster.cbas_nodes.append(server) if CbServer.Services.KV in server.services: cluster.kv_nodes.append(server) rest = RestConnection(server) rest.set_data_path(data_path=server.data_path, index_path=server.index_path, cbas_path=server.cbas_path) if self.expected_error: cluster.expected_error = \ self.expected_error.replace("INVALID_IP", invalid_ip) cluster.expected_error = \ self.expected_error.replace("PORT", cluster.master.port) cluster.otpNodes = list() cluster.cbas_path = server.cbas_path cluster.rest = RestConnection(cluster.master) if not self.set_cbas_memory_from_available_free_memory: self.log.info( "Setting the min possible memory quota so that adding " "more nodes to the cluster wouldn't be a problem.") cluster.rest.set_service_mem_quota({ CbServer.Settings.KV_MEM_QUOTA: MIN_KV_QUOTA, CbServer.Settings.FTS_MEM_QUOTA: FTS_QUOTA, CbServer.Settings.INDEX_MEM_QUOTA: INDEX_QUOTA }) cluster.set_cbas_memory_from_available_free_memory = \ self.input.param( 'set_cbas_memory_from_available_free_memory', False) self.log.info("Setting %d memory quota for CBAS" % CBAS_QUOTA) cluster.cbas_memory_quota = CBAS_QUOTA cluster.rest.set_service_mem_quota( {CbServer.Settings.CBAS_MEM_QUOTA: CBAS_QUOTA}) cluster.cbas_util = None # Drop any existing buckets and datasets if cluster.cbas_nodes: cluster.cbas_node = cluster.cbas_nodes[0] if self.set_cbas_memory_from_available_free_memory: self.set_memory_for_services( cluster.rest, cluster.cluster_util, cluster.cbas_node, cluster.cbas_node.services) cluster.cbas_util = CbasUtil(cluster.master, cluster.cbas_node, self.task) cluster.cbas_util_v2 = CbasUtilV2(cluster.master, cluster.cbas_node) if "cbas" in cluster.master.services: self.cleanup_cbas(cluster.cbas_util) if add_default_cbas_node: if cluster.master.ip != cluster.cbas_node.ip: cluster.otpNodes.append( cluster.cluster_util.add_node( cluster, cluster.cbas_node)) else: cluster.otpNodes = cluster.rest.node_statuses() """ This cbas cleanup is actually not needed. When a node is added to the cluster, it is automatically cleaned-up. """ self.cleanup_cbas(cluster.cbas_util) cluster.cbas_nodes.remove(cluster.cbas_node) if self.bucket_spec is not None: try: self.collectionSetUp(cluster, cluster.bucket_util, cluster.cluster_util) except Java_base_exception as exception: self.handle_collection_setup_exception(exception) except Exception as exception: self.handle_collection_setup_exception(exception) else: if self.default_bucket: cluster.bucket_util.create_default_bucket( self.cluster, bucket_type=self.bucket_type, ram_quota=self.bucket_size, replica=self.num_replicas, conflict_resolution=self. bucket_conflict_resolution_type, replica_index=self.bucket_replica_index, storage=self.bucket_storage, eviction_policy=self.bucket_eviction_policy, flush_enabled=self.flush_enabled) elif self.cb_bucket_name in self.sample_bucket_dict.keys(): self.sample_bucket = self.sample_bucket_dict[ self.cb_bucket_name] cluster.bucket_util.add_rbac_user(self.cluster.master) else: self.fail("No cluster is available") self.log.info( "=== CBAS_BASE setup was finished for test #{0} {1} ===".format( self.case_number, self._testMethodName))
def test_failover(self): self.setup_for_test(skip_data_loading=True) self.rebalance_node = self.input.param('rebalance_node', 'CC') self.how_many = self.input.param('how_many', 1) self.restart_rebalance = self.input.param('restart_rebalance', False) self.replica_change = self.input.param('replica_change', 0) self.add_back = self.input.param('add_back', False) query = "select sleep(count(*),50000) from {0};".format( self.cbas_dataset_name) handles = self.cbas_util._run_concurrent_queries(query, "async", 10) self.ingestion_in_progress() if self.rebalance_node == "CC": node_in_test = [self.cbas_node] otpNodes = [self.otpNodes[0]] self.cbas_util.closeConn() self.cbas_util = CbasUtil(self.cluster.master, self.cluster.cbas_nodes[0]) self.cbas_util.createConn("default") self.cbas_node = self.cluster.cbas_nodes[0] elif self.rebalance_node == "NC": node_in_test = self.cluster.cbas_nodes[:self.how_many] otpNodes = self.nc_otpNodes[:self.how_many] else: node_in_test = [self.cbas_node ] + self.cluster.cbas_nodes[:self.how_many] otpNodes = self.otpNodes[:self.how_many + 1] self.cbas_util.closeConn() self.cbas_util = CbasUtil(self.cluster.master, self.cluster.cbas_nodes[self.how_many]) self.cbas_util.createConn("default") replicas_before_rebalance = len( self.cbas_util.get_replicas_info(self.shell)) items_in_cbas_bucket = 0 start_time = time.time() while (items_in_cbas_bucket == 0 or items_in_cbas_bucket == -1) and time.time() < start_time + 60: try: items_in_cbas_bucket, _ = self.cbas_util.get_num_items_in_cbas_dataset( self.cbas_dataset_name) except: pass self.sleep(1) self.log.info("Items before failover node: %s" % items_in_cbas_bucket) if self.restart_rebalance: graceful_failover = self.input.param("graceful_failover", False) failover_task = self._cb_cluster.async_failover( self.input.servers, node_in_test, graceful_failover) self.task_manager.get_task_result(failover_task) if self.add_back: for otpnode in otpNodes: self.rest.set_recovery_type('ns_1@' + otpnode.ip, "full") self.rest.add_back_node('ns_1@' + otpnode.ip) self.rebalance(wait_for_completion=False) else: self.rebalance(ejected_nodes=[node.id for node in otpNodes], wait_for_completion=False) self.sleep(2) if self.rest._rebalance_progress_status() == "running": self.assertTrue(self.rest.stop_rebalance(wait_timeout=120), "Failed while stopping rebalance.") if self.add_back: self.rebalance(wait_for_completion=False) else: self.rebalance( ejected_nodes=[node.id for node in otpNodes], wait_for_completion=False) else: self.fail( "Rebalance completed before the test could have stopped rebalance." ) else: graceful_failover = self.input.param("graceful_failover", False) failover_task = self._cb_cluster.async_failover( self.input.servers, node_in_test, graceful_failover) self.task_manager.get_task_result(failover_task) if self.add_back: for otpnode in otpNodes: self.rest.set_recovery_type('ns_1@' + otpnode.ip, "full") self.rest.add_back_node('ns_1@' + otpnode.ip) self.rebalance(wait_for_completion=False) replicas_before_rebalance -= self.replica_change self.sleep(5) str_time = time.time() while self.rest._rebalance_progress_status( ) == "running" and time.time() < str_time + 300: replicas = self.cbas_util.get_replicas_info(self.shell) if replicas: for replica in replicas: self.log.info("replica state during rebalance: %s" % replica['status']) self.sleep(15) replicas = self.cbas_util.get_replicas_info(self.shell) replicas_after_rebalance = len(replicas) self.assertEqual( replicas_after_rebalance, replicas_before_rebalance, "%s,%s" % (replicas_after_rebalance, replicas_before_rebalance)) for replica in replicas: self.log.info("replica state during rebalance: %s" % replica['status']) self.assertEqual( replica['status'], "IN_SYNC", "Replica state is incorrect: %s" % replica['status']) items_in_cbas_bucket = 0 start_time = time.time() while (items_in_cbas_bucket == 0 or items_in_cbas_bucket == -1) and time.time() < start_time + 60: try: items_in_cbas_bucket, _ = self.cbas_util.get_num_items_in_cbas_dataset( self.cbas_dataset_name) except: pass self.sleep(1) self.log.info("After rebalance operation docs in CBAS bucket : %s" % items_in_cbas_bucket) if items_in_cbas_bucket < self.num_items * 2 and items_in_cbas_bucket > self.num_items: self.log.info("Data Ingestion Interrupted successfully") elif items_in_cbas_bucket < self.num_items: self.log.info( "Data Ingestion did interrupted and restarting from 0.") else: self.log.info( "Data Ingestion did not interrupted but complete before rebalance operation." ) run_count = 0 fail_count = 0 success_count = 0 aborted_count = 0 shell = RemoteMachineShellConnection(node_in_test[0]) for handle in handles: status, hand = self.cbas_util.retrieve_request_status_using_handle( node_in_test, handle, shell) if status == "running": run_count += 1 self.log.info("query with handle %s is running." % handle) elif status == "failed": fail_count += 1 self.log.info("query with handle %s is failed." % handle) elif status == "success": success_count += 1 self.log.info("query with handle %s is successful." % handle) else: aborted_count += 1 self.log.info("Queued job is deleted: %s" % status) self.log.info("After service restart %s queued jobs are Running." % run_count) self.log.info("After service restart %s queued jobs are Failed." % fail_count) self.log.info("After service restart %s queued jobs are Successful." % success_count) self.log.info("After service restart %s queued jobs are Aborted." % aborted_count) if self.rebalance_node == "NC": self.assertTrue(aborted_count == 0, "Some queries aborted") query = "select count(*) from {0};".format(self.cbas_dataset_name) self.cbas_util._run_concurrent_queries(query, "immediate", 100) if not self.cbas_util.validate_cbas_dataset_items_count( self.cbas_dataset_name, self.num_items * 2): self.fail( "No. of items in CBAS dataset do not match that in the CB bucket" ) self.ingest_more_data()