class PartialRollback_CBAS(CBASBaseTest): def setUp(self): self.input = TestInputSingleton.input self.input.test_params.update({"default_bucket": False}) super(PartialRollback_CBAS, self).setUp() ''' Considering all the scenarios where: 1. There can be 1 KV and multiple cbas nodes(and tests wants to add all cbas into cluster.) 2. There can be 1 KV and multiple cbas nodes(and tests wants only 1 cbas node) 3. There can be only 1 node running KV,CBAS service. NOTE: Cases pending where there are nodes which are running only cbas. For that service check on nodes is needed. ''' if "add_all_cbas_nodes" in self.input.test_params and self.input.test_params[ "add_all_cbas_nodes"] and len(self.cluster.cbas_nodes) > 0: self.otpNodes.extend( self.add_all_nodes_then_rebalance(self.cluster.cbas_nodes)) '''Create default bucket''' self.bucket_util.create_default_bucket(storage=self.bucket_storage) self.cbas_util.createConn("default") self.merge_policy = self.input.param('merge_policy', None) self.max_mergable_component_size = self.input.param( 'max_mergable_component_size', 16384) self.max_tolerance_component_count = self.input.param( 'max_tolerance_component_count', 2) self.create_index = self.input.param('create_index', False) self.where_field = self.input.param('where_field', None) self.where_value = self.input.param('where_value', None) self.CC = self.input.param('CC', False) def setup_for_test(self, skip_data_loading=False): if not skip_data_loading: # Load Couchbase bucket first. self.perform_doc_ops_in_all_cb_buckets("create", 0, self.num_items, batch_size=1000) # Create dataset on the CBAS bucket if self.merge_policy == None: self.cbas_util.create_dataset_on_bucket( cbas_bucket_name=self.cb_bucket_name, where_field=self.where_field, where_value=self.where_value, cbas_dataset_name=self.cbas_dataset_name) else: self.cbas_util.create_dataset_on_bucket_merge_policy( cbas_bucket_name=self.cb_bucket_name, where_field=self.where_field, where_value=self.where_value, cbas_dataset_name=self.cbas_dataset_name, merge_policy=self.merge_policy, max_mergable_component_size=self.max_mergable_component_size, max_tolerance_component_count=self. max_tolerance_component_count) if self.create_index: create_idx_statement = "create index {0} on {1}({2});".format( self.index_name, self.cbas_dataset_name, "profession:string") status, metrics, errors, results, _ = self.cbas_util.execute_statement_on_cbas_util( create_idx_statement) # Connect to Bucket self.cbas_util.connect_to_bucket( cbas_bucket_name=self.cbas_bucket_name, cb_bucket_password=self.cb_bucket_password) if not skip_data_loading: result = RestConnection(self.cluster.master).query_tool( "CREATE INDEX {0} ON {1}({2})".format(self.index_name, self.cb_bucket_name, "profession")) self.sleep(20, "wait for index creation.") self.assertTrue(result['status'] == "success") if self.where_field and self.where_value: items = RestConnection(self.cluster.master).query_tool( 'select count(*) from %s where %s = "%s"' % (self.cb_bucket_name, self.where_field, self.where_value))['results'][0]['$1'] else: items = self.num_items # Validate no. of items in CBAS dataset if not self.cbas_util.validate_cbas_dataset_items_count( self.cbas_dataset_name, items): self.fail( "No. of items in CBAS dataset do not match that in the CB bucket" ) def tearDown(self): super(PartialRollback_CBAS, self).tearDown() def test_ingestion_after_kv_rollback_create_ops(self): self.setup_for_test() items_before_persistence_stop = self.cbas_util.get_num_items_in_cbas_dataset( self.cbas_dataset_name)[0] self.log.info("Items in CBAS before persistence stop: %s" % items_before_persistence_stop) # Stop Persistence on Node A & Node B self.log.info("Stopping persistence on NodeA") mem_client = MemcachedClientHelper.direct_client( self.cluster.master, self.cb_bucket_name) mem_client.stop_persistence() # Perform Create, Update, Delete ops in the CB bucket self.log.info("Performing Mutations") self.perform_doc_ops_in_all_cb_buckets("create", self.num_items, self.num_items * 3 / 2) kv_nodes = self.get_kv_nodes(self.servers, self.cluster.master) items_in_cb_bucket = 0 if self.where_field and self.where_value: items_in_cb_bucket = RestConnection( self.cluster.master).query_tool( 'select count(*) from %s where %s = "%s"' % (self.cb_bucket_name, self.where_field, self.where_value))['results'][0]['$1'] else: for node in kv_nodes: items_in_cb_bucket += self.get_item_count_mc( node, self.cb_bucket_name) # Validate no. of items in CBAS dataset self.assertTrue( self.cbas_util.validate_cbas_dataset_items_count( self.cbas_dataset_name, items_in_cb_bucket, 0), "No. of items in CBAS dataset do not match that in the CB bucket") # Count no. of items in CB & CBAS Buckets items_in_cbas_bucket, _ = self.cbas_util.get_num_items_in_cbas_dataset( self.cbas_dataset_name) self.log.info( "Before Rollback --- # docs in CB bucket : %s, # docs in CBAS bucket : %s", items_in_cb_bucket, items_in_cbas_bucket) self.assertTrue( items_in_cb_bucket == items_in_cbas_bucket, "Before Rollback : # Items in CBAS bucket does not match that in the CB bucket" ) # Kill memcached on Node A so that Node B becomes master self.log.info("Kill Memcached process on NodeA") shell = RemoteMachineShellConnection(self.cluster.master) shell.kill_memcached() self.sleep(2, "Wait for 2 secs for DCP rollback sent to CBAS.") curr = time.time() while items_in_cbas_bucket != 0 and items_in_cbas_bucket > items_before_persistence_stop: items_in_cbas_bucket, _ = self.cbas_util.get_num_items_in_cbas_dataset( self.cbas_dataset_name) if curr + 120 < time.time(): break self.assertTrue(items_in_cbas_bucket <= items_before_persistence_stop, "Roll-back did not happen.") self.log.info("#######BINGO########\nROLLBACK HAPPENED") items_in_cb_bucket = 0 curr = time.time() while items_in_cb_bucket != items_in_cbas_bucket: items_in_cb_bucket = 0 items_in_cbas_bucket = 0 if self.where_field and self.where_value: try: items_in_cb_bucket = RestConnection( self.cluster.master).query_tool( 'select count(*) from %s where %s = "%s"' % (self.cb_bucket_name, self.where_field, self.where_value))['results'][0]['$1'] except: self.log.info( "Indexer in rollback state. Query failed. Pass and move ahead." ) pass else: for node in kv_nodes: items_in_cb_bucket += self.get_item_count_mc( node, self.cb_bucket_name) self.log.info("Items in CB bucket after rollback: %s" % items_in_cb_bucket) items_in_cbas_bucket, _ = self.cbas_util.get_num_items_in_cbas_dataset( self.cbas_dataset_name) if curr + 120 < time.time(): break self.log.info( "After Rollback --- # docs in CB bucket : %s, # docs in CBAS bucket : %s", items_in_cb_bucket, items_in_cbas_bucket) self.assertTrue( items_in_cb_bucket == items_in_cbas_bucket, "After Rollback : # Items in CBAS bucket does not match that in the CB bucket" ) def test_ingestion_after_kv_rollback_create_ops_MB29860(self): self.setup_for_test() items_before_persistence_stop = self.cbas_util.get_num_items_in_cbas_dataset( self.cbas_dataset_name)[0] self.log.info("Items in CBAS before persistence stop: %s" % items_before_persistence_stop) # Stop Persistence on Node A & Node B self.log.info("Stopping persistence on NodeA") mem_client = MemcachedClientHelper.direct_client( self.cluster.master, self.cb_bucket_name) mem_client.stop_persistence() # Perform Create, Update, Delete ops in the CB bucket self.log.info("Performing Mutations") self.perform_doc_ops_in_all_cb_buckets("create", self.num_items, self.num_items * 3 / 2) kv_nodes = self.get_kv_nodes(self.servers, self.cluster.master) items_in_cb_bucket = 0 if self.where_field and self.where_value: items_in_cb_bucket = RestConnection( self.cluster.master).query_tool( 'select count(*) from %s where %s = "%s"' % (self.cb_bucket_name, self.where_field, self.where_value))['results'][0]['$1'] else: for node in kv_nodes: items_in_cb_bucket += self.get_item_count_mc( node, self.cb_bucket_name) # Validate no. of items in CBAS dataset self.assertTrue( self.cbas_util.validate_cbas_dataset_items_count( self.cbas_dataset_name, items_in_cb_bucket, 0), "No. of items in CBAS dataset do not match that in the CB bucket") # Count no. of items in CB & CBAS Buckets items_in_cbas_bucket, _ = self.cbas_util.get_num_items_in_cbas_dataset( self.cbas_dataset_name) self.log.info( "Before Rollback --- # docs in CB bucket : %s, # docs in CBAS bucket : %s", items_in_cb_bucket, items_in_cbas_bucket) self.assertTrue( items_in_cb_bucket == items_in_cbas_bucket, "Before Rollback : # Items in CBAS bucket does not match that in the CB bucket" ) self.cbas_util.disconnect_from_bucket(self.cbas_bucket_name) # Kill memcached on Node A so that Node B becomes master self.log.info("Kill Memcached process on NodeA") shell = RemoteMachineShellConnection(self.cluster.master) shell.kill_memcached() if self.input.param('kill_cbas', False): shell = RemoteMachineShellConnection(self.cbas_node) shell.kill_process("/opt/couchbase/lib/cbas/runtime/bin/java", "java") shell.kill_process("/opt/couchbase/bin/cbas", "cbas") tries = 60 result = False while tries > 0 and not result: try: result = self.cbas_util.connect_to_bucket( self.cbas_bucket_name) tries -= 1 except: pass self.sleep(2) self.assertTrue( result, "CBAS connect bucket failed after memcached killed on KV node.") self.sleep(2, "Wait for 2 secs for DCP rollback sent to CBAS.") curr = time.time() while items_in_cbas_bucket != 0 and items_in_cbas_bucket > items_before_persistence_stop: items_in_cbas_bucket, _ = self.cbas_util.get_num_items_in_cbas_dataset( self.cbas_dataset_name) if curr + 120 < time.time(): break self.assertTrue(items_in_cbas_bucket <= items_before_persistence_stop, "Roll-back did not happen.") self.log.info("#######BINGO########\nROLLBACK HAPPENED") items_in_cb_bucket = 0 curr = time.time() while items_in_cb_bucket != items_in_cbas_bucket: items_in_cb_bucket = 0 items_in_cbas_bucket = 0 if self.where_field and self.where_value: try: items_in_cb_bucket = RestConnection( self.cluster.master).query_tool( 'select count(*) from %s where %s = "%s"' % (self.cb_bucket_name, self.where_field, self.where_value))['results'][0]['$1'] except: self.log.info( "Indexer in rollback state. Query failed. Pass and move ahead." ) pass else: for node in kv_nodes: items_in_cb_bucket += self.get_item_count_mc( node, self.cb_bucket_name) self.log.info("Items in CB bucket after rollback: %s" % items_in_cb_bucket) items_in_cbas_bucket, _ = self.cbas_util.get_num_items_in_cbas_dataset( self.cbas_dataset_name) if curr + 120 < time.time(): break self.log.info( "After Rollback --- # docs in CB bucket : %s, # docs in CBAS bucket : %s", items_in_cb_bucket, items_in_cbas_bucket) self.assertTrue( items_in_cb_bucket == items_in_cbas_bucket, "After Rollback : # Items in CBAS bucket does not match that in the CB bucket" ) def test_ingestion_after_kv_rollback_delete_ops(self): self.setup_for_test() # Stop Persistence on Node A & Node B self.log.info("Stopping persistence on NodeA") mem_client = MemcachedClientHelper.direct_client( self.cluster.master, self.cb_bucket_name) mem_client.stop_persistence() # Perform Create, Update, Delete ops in the CB bucket self.log.info("Performing Mutations") self.perform_doc_ops_in_all_cb_buckets("delete", 0, self.num_items / 2) kv_nodes = self.get_kv_nodes(self.servers, self.cluster.master) items_in_cb_bucket = 0 if self.where_field and self.where_value: items_in_cb_bucket = RestConnection( self.cluster.master).query_tool( 'select count(*) from %s where %s = "%s"' % (self.cb_bucket_name, self.where_field, self.where_value))['results'][0]['$1'] else: for node in kv_nodes: items_in_cb_bucket += self.get_item_count_mc( node, self.cb_bucket_name) # Validate no. of items in CBAS dataset self.assertTrue( self.cbas_util.validate_cbas_dataset_items_count( self.cbas_dataset_name, items_in_cb_bucket, 0), "No. of items in CBAS dataset do not match that in the CB bucket") # Count no. of items in CB & CBAS Buckets items_in_cbas_bucket, _ = self.cbas_util.get_num_items_in_cbas_dataset( self.cbas_dataset_name) items_before_rollback = items_in_cbas_bucket self.log.info( "Before Rollback --- # docs in CB bucket : %s, # docs in CBAS bucket : %s", items_in_cb_bucket, items_in_cbas_bucket) self.assertTrue( items_in_cb_bucket == items_in_cbas_bucket, "Before Rollback : # Items in CBAS bucket does not match that in the CB bucket" ) # Kill memcached on Node A so that Node B becomes master self.log.info("Kill Memcached process on NodeA") shell = RemoteMachineShellConnection(self.cluster.master) shell.kill_memcached() self.sleep(2, "Wait for 2 secs for DCP rollback sent to CBAS.") curr = time.time() while items_in_cbas_bucket != 0 and items_in_cbas_bucket <= items_before_rollback: items_in_cbas_bucket, _ = self.cbas_util.get_num_items_in_cbas_dataset( self.cbas_dataset_name) if curr + 120 < time.time(): break self.assertTrue(items_in_cbas_bucket > items_before_rollback, "Roll-back did not happen.") self.log.info("#######BINGO########\nROLLBACK HAPPENED") items_in_cb_bucket = 0 curr = time.time() while items_in_cb_bucket != items_in_cbas_bucket: items_in_cb_bucket = 0 items_in_cbas_bucket = 0 if self.where_field and self.where_value: try: items_in_cb_bucket = RestConnection( self.cluster.master).query_tool( 'select count(*) from %s where %s = "%s"' % (self.cb_bucket_name, self.where_field, self.where_value))['results'][0]['$1'] except: self.log.info( "Indexer in rollback state. Query failed. Pass and move ahead." ) pass else: for node in kv_nodes: items_in_cb_bucket += self.get_item_count_mc( node, self.cb_bucket_name) self.log.info("Items in CB bucket after rollback: %s" % items_in_cb_bucket) items_in_cbas_bucket, _ = self.cbas_util.get_num_items_in_cbas_dataset( self.cbas_dataset_name) if curr + 120 < time.time(): break self.log.info( "After Rollback --- # docs in CB bucket : %s, # docs in CBAS bucket : %s", items_in_cb_bucket, items_in_cbas_bucket) self.assertTrue( items_in_cb_bucket == items_in_cbas_bucket, "After Rollback : # Items in CBAS bucket does not match that in the CB bucket" ) def test_ingestion_after_kv_rollback_cbas_disconnected(self): self.setup_for_test() # Stop Persistence on Node A & Node B self.log.info("Stopping persistence on NodeA") mem_client = MemcachedClientHelper.direct_client( self.cluster.master, self.cb_bucket_name) mem_client.stop_persistence() # Perform Create, Update, Delete ops in the CB bucket self.log.info("Performing Mutations") self.perform_doc_ops_in_all_cb_buckets("delete", 0, self.num_items / 2) # Count no. of items in CB & CBAS Buckets kv_nodes = self.get_kv_nodes(self.servers, self.cluster.master) items_in_cb_bucket = 0 for node in kv_nodes: items_in_cb_bucket += self.get_item_count_mc( node, self.cb_bucket_name) items_in_cbas_bucket, _ = self.cbas_util.get_num_items_in_cbas_dataset( self.cbas_dataset_name) items_before_rollback = items_in_cbas_bucket self.log.info( "Before Rollback --- # docs in CB bucket : %s, # docs in CBAS bucket : %s", items_in_cb_bucket, items_in_cbas_bucket) self.assertTrue( items_in_cb_bucket == items_in_cbas_bucket, "Before Rollback : # Items in CBAS bucket does not match that in the CB bucket" ) self.cbas_util.disconnect_from_bucket(self.cbas_bucket_name) # Kill memcached on Node A so that Node B becomes master self.log.info("Kill Memcached process on NodeA") shell = RemoteMachineShellConnection(self.cluster.master) shell.kill_memcached() # self.sleep(10,"Wait for 10 secs for memcached restarts.") if self.input.param('kill_cbas', False): shell = RemoteMachineShellConnection(self.cbas_node) shell.kill_process("/opt/couchbase/lib/cbas/runtime/bin/java", "java") shell.kill_process("/opt/couchbase/bin/cbas", "cbas") tries = 60 result = False while tries > 0 and not result: try: result = self.cbas_util.connect_to_bucket( self.cbas_bucket_name) tries -= 1 except: pass self.sleep(2) self.assertTrue( result, "CBAS connect bucket failed after memcached killed on KV node.") curr = time.time() while items_in_cbas_bucket != 0 and items_in_cbas_bucket <= items_before_rollback: items_in_cbas_bucket, _ = self.cbas_util.get_num_items_in_cbas_dataset( self.cbas_dataset_name) if curr + 120 < time.time(): break self.assertTrue(items_in_cbas_bucket > items_before_rollback, "Roll-back did not happen.") self.log.info("#######BINGO########\nROLLBACK HAPPENED") curr = time.time() while items_in_cb_bucket != items_in_cbas_bucket: items_in_cb_bucket = 0 items_in_cbas_bucket = 0 if self.where_field and self.where_value: try: items_in_cb_bucket = RestConnection( self.cluster.master).query_tool( 'select count(*) from %s where %s = "%s"' % (self.cb_bucket_name, self.where_field, self.where_value))['results'][0]['$1'] except: self.log.info( "Indexer in rollback state. Query failed. Pass and move ahead." ) pass else: for node in kv_nodes: items_in_cb_bucket += self.get_item_count_mc( node, self.cb_bucket_name) self.log.info("Items in CB bucket after rollback: %s" % items_in_cb_bucket) items_in_cbas_bucket, _ = self.cbas_util.get_num_items_in_cbas_dataset( self.cbas_dataset_name) if curr + 120 < time.time(): break # Count no. of items in CB & CBAS Buckets items_in_cbas_bucket, _ = self.cbas_util.get_num_items_in_cbas_dataset( self.cbas_dataset_name) self.log.info( "After Rollback --- # docs in CB bucket : %s, # docs in CBAS bucket : %s", items_in_cb_bucket, items_in_cbas_bucket) self.assertTrue( items_in_cb_bucket == items_in_cbas_bucket, "After Rollback : # Items in CBAS bucket does not match that in the CB bucket" ) def test_rebalance_kv_rollback_create_ops(self): self.setup_for_test() items_before_persistence_stop = self.cbas_util.get_num_items_in_cbas_dataset( self.cbas_dataset_name)[0] self.log.info("Items in CBAS before persistence stop: %s" % items_before_persistence_stop) # Stop Persistence on Node A & Node B self.log.info("Stopping persistence on NodeA") mem_client = MemcachedClientHelper.direct_client( self.cluster.master, self.cb_bucket_name) mem_client.stop_persistence() # Perform Create, Update, Delete ops in the CB bucket self.log.info("Performing Mutations") self.perform_doc_ops_in_all_cb_buckets("create", self.num_items, self.num_items * 3 / 2) kv_nodes = self.get_kv_nodes(self.servers, self.cluster.master) items_in_cb_bucket = 0 if self.where_field and self.where_value: items_in_cb_bucket = RestConnection( self.cluster.master).query_tool( 'select count(*) from %s where %s = "%s"' % (self.cb_bucket_name, self.where_field, self.where_value))['results'][0]['$1'] else: for node in kv_nodes: items_in_cb_bucket += self.get_item_count_mc( node, self.cb_bucket_name) # Validate no. of items in CBAS dataset self.assertTrue( self.cbas_util.validate_cbas_dataset_items_count( self.cbas_dataset_name, items_in_cb_bucket, 0), "No. of items in CBAS dataset do not match that in the CB bucket") # Count no. of items in CB & CBAS Buckets items_in_cbas_bucket, _ = self.cbas_util.get_num_items_in_cbas_dataset( self.cbas_dataset_name) self.log.info( "Before Rollback --- # docs in CB bucket : %s, # docs in CBAS bucket : %s", items_in_cb_bucket, items_in_cbas_bucket) self.assertTrue( items_in_cb_bucket == items_in_cbas_bucket, "Before Rollback : # Items in CBAS bucket does not match that in the CB bucket" ) if self.CC: self.cluster_util.remove_node([self.otpNodes[0]], wait_for_rebalance=False) self.cbas_util.closeConn() self.cbas_util = CbasUtil(self.cluster.master, self.cluster.cbas_nodes[0]) self.cbas_util.createConn("default") else: self.cluster_util.remove_node([self.otpNodes[1]], wait_for_rebalance=False) # Kill memcached on Node A so that Node B becomes master self.log.info("Kill Memcached process on NodeA") shell = RemoteMachineShellConnection(self.cluster.master) shell.kill_memcached() self.sleep(2, "Wait for 2 secs for DCP rollback sent to CBAS.") curr = time.time() while items_in_cbas_bucket == -1 or ( items_in_cbas_bucket != 0 and items_in_cbas_bucket > items_before_persistence_stop): try: if curr + 120 < time.time(): break items_in_cbas_bucket, _ = self.cbas_util.get_num_items_in_cbas_dataset( self.cbas_dataset_name) self.log.info("Items in CBAS: %s" % items_in_cbas_bucket) except: self.log.info( "Probably rebalance is in progress and the reason for queries being failing." ) pass self.assertTrue(items_in_cbas_bucket <= items_before_persistence_stop, "Roll-back did not happen.") self.log.info("#######BINGO########\nROLLBACK HAPPENED") items_in_cb_bucket = 0 curr = time.time() while items_in_cb_bucket != items_in_cbas_bucket or items_in_cb_bucket == 0: items_in_cb_bucket = 0 items_in_cbas_bucket = 0 if self.where_field and self.where_value: try: items_in_cb_bucket = RestConnection( self.cluster.master).query_tool( 'select count(*) from %s where %s = "%s"' % (self.cb_bucket_name, self.where_field, self.where_value))['results'][0]['$1'] except: self.log.info( "Indexer in rollback state. Query failed. Pass and move ahead." ) pass else: for node in kv_nodes: items_in_cb_bucket += self.get_item_count_mc( node, self.cb_bucket_name) self.log.info("Items in CB bucket after rollback: %s" % items_in_cb_bucket) try: items_in_cbas_bucket, _ = self.cbas_util.get_num_items_in_cbas_dataset( self.cbas_dataset_name) except: pass if curr + 120 < time.time(): break str_time = time.time() while self.rest._rebalance_progress_status( ) == "running" and time.time() < str_time + 300: self.sleep(1) self.log.info("Waiting for rebalance to complete") self.log.info( "After Rollback --- # docs in CB bucket : %s, # docs in CBAS bucket : %s", items_in_cb_bucket, items_in_cbas_bucket) self.assertTrue( items_in_cb_bucket == items_in_cbas_bucket, "After Rollback : # Items in CBAS bucket does not match that in the CB bucket" )
class IngestionInterrupt_CBAS(CBASBaseTest): def setUp(self): self.input = TestInputSingleton.input self.input.test_params.update({"default_bucket": False}) super(IngestionInterrupt_CBAS, self).setUp() if "add_all_cbas_nodes" in self.input.test_params \ and self.input.test_params["add_all_cbas_nodes"] \ and len(self.cluster.cbas_nodes) > 0: self.otpNodes.extend( self.cluster_util.add_all_nodes_then_rebalance( self.cluster, self.cluster.cbas_nodes)) self.bucket_util.create_default_bucket(self.cluster, storage=self.bucket_storage) self.cb_bucket_name = self.input.param('cb_bucket_name', 'default') self.cbas_util.createConn("default") def setup_for_test(self, skip_data_loading=False): if not skip_data_loading: # Load Couchbase bucket first. self.perform_doc_ops_in_all_cb_buckets("create", 0, self.num_items, batch_size=1000) self.bucket_util.verify_stats_all_buckets(self.cluster, self.num_items) # Create dataset on the CBAS bucket self.cbas_util.create_dataset_on_bucket( cbas_bucket_name=self.cb_bucket_name, cbas_dataset_name=self.cbas_dataset_name) # Create indexes on the CBAS bucket self.create_secondary_indexes = self.input.param( "create_secondary_indexes", False) if self.create_secondary_indexes: self.index_fields = "profession:string,number:bigint" create_idx_statement = "create index {0} on {1}({2});".format( self.index_name, self.cbas_dataset_name, self.index_fields) status, metrics, errors, results, _ = self.cbas_util.execute_statement_on_cbas_util( create_idx_statement) self.assertTrue(status == "success", "Create Index query failed") self.assertTrue( self.cbas_util.verify_index_created( self.index_name, self.index_fields.split(","), self.cbas_dataset_name)[0]) # Connect to Bucket self.cbas_util.connect_to_bucket( cbas_bucket_name=self.cbas_bucket_name, cb_bucket_password=self.cb_bucket_password) if not skip_data_loading: # Validate no. of items in CBAS dataset if not self.cbas_util.validate_cbas_dataset_items_count( self.cbas_dataset_name, self.num_items): self.fail( "No. of items in CBAS dataset do not match that in the CB bucket" ) def ingestion_in_progress(self): self.cbas_util.disconnect_from_bucket(self.cbas_bucket_name) self.perform_doc_ops_in_all_cb_buckets("create", self.num_items, self.num_items * 3, batch_size=1000) self.cbas_util.connect_to_bucket( cbas_bucket_name=self.cbas_bucket_name, cb_bucket_password=self.cb_bucket_password) def test_service_restart(self): self.setup_for_test() self.restart_method = self.input.param('restart_method', None) self.cbas_node_type = self.input.param('cbas_node_type', None) query = "select sleep(count(*),50000) from {0};".format( self.cbas_dataset_name) handles = self.cbas_util._run_concurrent_queries(query, "async", 10) self.ingestion_in_progress() if self.cbas_node_type == "CC": node_in_test = self.cbas_node else: node_in_test = self.cluster.cbas_nodes[0] items_in_cbas_bucket, _ = self.cbas_util.get_num_items_in_cbas_dataset( self.cbas_dataset_name) self.log.info("Items before service restart: %s" % items_in_cbas_bucket) if self.restart_method == "graceful": self.log.info("Gracefully re-starting service on node %s" % node_in_test) NodeHelper.do_a_warm_up(node_in_test) NodeHelper.wait_service_started(node_in_test) else: self.log.info("Kill Memcached process on node %s" % node_in_test) shell = RemoteMachineShellConnection(node_in_test) shell.kill_memcached() items_in_cbas_bucket = 0 start_time = time.time() while (items_in_cbas_bucket == 0 or items_in_cbas_bucket == -1) and time.time() < start_time + 60: try: items_in_cbas_bucket, _ = self.cbas_util.get_num_items_in_cbas_dataset( self.cbas_dataset_name) except: pass self.log.info( "After graceful service restart docs in CBAS bucket : %s" % items_in_cbas_bucket) if items_in_cbas_bucket < self.num_items * 3 and items_in_cbas_bucket > self.num_items: self.log.info("Data Ingestion Interrupted successfully") elif items_in_cbas_bucket < self.num_items: self.log.info( "Data Ingestion did interrupted and restarting from 0.") else: self.log.info( "Data Ingestion did not interrupted but complete before service restart." ) run_count = 0 fail_count = 0 success_count = 0 aborted_count = 0 shell = RemoteMachineShellConnection(node_in_test) for handle in handles: status, hand = self.cbas_util.retrieve_request_status_using_handle( node_in_test, handle, shell) if status == "running": run_count += 1 self.log.info("query with handle %s is running." % handle) elif status == "failed": fail_count += 1 self.log.info("query with handle %s is failed." % handle) elif status == "success": success_count += 1 self.log.info("query with handle %s is successful." % handle) else: aborted_count += 1 self.log.info("Queued job is deleted: %s" % status) self.log.info("After service restart %s queued jobs are Running." % run_count) self.log.info("After service restart %s queued jobs are Failed." % fail_count) self.log.info("After service restart %s queued jobs are Successful." % success_count) self.log.info("After service restart %s queued jobs are Aborted." % aborted_count) if self.cbas_node_type == "NC": self.assertTrue(fail_count + aborted_count == 0, "Some queries failed/aborted") query = "select count(*) from {0};".format(self.cbas_dataset_name) self.cbas_util._run_concurrent_queries(query, "immediate", 100) if not self.cbas_util.validate_cbas_dataset_items_count( self.cbas_dataset_name, self.num_items * 3): self.fail( "No. of items in CBAS dataset do not match that in the CB bucket" ) def test_kill_analytics_service(self): self.setup_for_test() process_name = self.input.param('process_name', None) service_name = self.input.param('service_name', None) cbas_node_type = self.input.param('cbas_node_type', None) query = "select sleep(count(*),50000) from {0};".format( self.cbas_dataset_name) handles = self.cbas_util._run_concurrent_queries(query, "async", 10) self.ingestion_in_progress() if cbas_node_type == "CC": node_in_test = self.cbas_node else: node_in_test = self.cluster.cbas_nodes[0] items_in_cbas_bucket, _ = self.cbas_util.get_num_items_in_cbas_dataset( self.cbas_dataset_name) self.log.info("Items before service kill: %s" % items_in_cbas_bucket) self.log.info("Kill %s process on node %s" % (process_name, node_in_test)) shell = RemoteMachineShellConnection(node_in_test) shell.kill_process(process_name, service_name) items_in_cbas_bucket = 0 start_time = time.time() while (items_in_cbas_bucket == 0 or items_in_cbas_bucket == -1) and time.time() < start_time + 60: try: items_in_cbas_bucket, _ = self.cbas_util.get_num_items_in_cbas_dataset( self.cbas_dataset_name) except: pass # start_time = time.time() # while items_in_cbas_bucket <=0 and time.time()<start_time+120: # items_in_cbas_bucket, _ = self.cbas_util.get_num_items_in_cbas_dataset(self.cbas_dataset_name) # self.sleep(1) # items_in_cbas_bucket, _ = self.cbas_util.get_num_items_in_cbas_dataset(self.cbas_dataset_name) self.log.info("After %s kill, docs in CBAS bucket : %s" % (process_name, items_in_cbas_bucket)) if items_in_cbas_bucket < self.num_items * 3 and items_in_cbas_bucket > self.num_items: self.log.info("Data Ingestion Interrupted successfully") elif items_in_cbas_bucket < self.num_items: self.log.info( "Data Ingestion did not interrupted but restarting from 0.") else: self.log.info( "Data Ingestion did not interrupted but complete before service restart." ) run_count = 0 fail_count = 0 success_count = 0 aborted_count = 0 shell = RemoteMachineShellConnection(node_in_test) for handle in handles: status, hand = self.cbas_util.retrieve_request_status_using_handle( node_in_test, handle, shell) if status == "running": run_count += 1 self.log.info("query with handle %s is running." % handle) elif status == "failed": fail_count += 1 self.log.info("query with handle %s is failed." % handle) elif status == "success": success_count += 1 self.log.info("query with handle %s is successful." % handle) else: aborted_count += 1 self.log.info("Queued job is deleted: %s" % status) self.log.info("After service restart %s queued jobs are Running." % run_count) self.log.info("After service restart %s queued jobs are Failed." % fail_count) self.log.info("After service restart %s queued jobs are Successful." % success_count) self.log.info("After service restart %s queued jobs are Aborted." % aborted_count) if cbas_node_type == "NC": self.assertTrue((fail_count + aborted_count) == 0, "Some queries failed/aborted") query = "select count(*) from {0};".format(self.cbas_dataset_name) self.cbas_util._run_concurrent_queries(query, "immediate", 100) if not self.cbas_util.validate_cbas_dataset_items_count( self.cbas_dataset_name, self.num_items * 3): self.fail( "No. of items in CBAS dataset do not match that in the CB bucket" ) def test_stop_start_service_ingest_data(self): self.setup_for_test() self.cbas_node_type = self.input.param('cbas_node_type', None) query = "select sleep(count(*),50000) from {0};".format( self.cbas_dataset_name) handles = self.cbas_util._run_concurrent_queries(query, "async", 10) self.ingestion_in_progress() if self.cbas_node_type == "CC": node_in_test = self.cbas_node self.cbas_util.closeConn() self.cbas_util = CbasUtil(self.cluster.master, self.cluster.cbas_nodes[0]) self.cbas_util.createConn("default") else: node_in_test = self.cluster.cbas_nodes[0] items_in_cbas_bucket, _ = self.cbas_util.get_num_items_in_cbas_dataset( self.cbas_dataset_name) self.log.info("Items before service restart: %s" % items_in_cbas_bucket) self.log.info("Gracefully stopping service on node %s" % node_in_test) NodeHelper.stop_couchbase(node_in_test) NodeHelper.start_couchbase(node_in_test) NodeHelper.wait_service_started(node_in_test) # self.sleep(10, "wait for service to come up.") # # items_in_cbas_bucket, _ = self.cbas_util.get_num_items_in_cbas_dataset(self.cbas_dataset_name) # self.log.info("After graceful STOPPING/STARTING service docs in CBAS bucket : %s"%items_in_cbas_bucket) # # start_time = time.time() # while items_in_cbas_bucket <=0 and time.time()<start_time+60: # items_in_cbas_bucket, _ = self.cbas_util.get_num_items_in_cbas_dataset(self.cbas_dataset_name) # self.sleep(1) items_in_cbas_bucket = 0 start_time = time.time() while (items_in_cbas_bucket == 0 or items_in_cbas_bucket == -1) and time.time() < start_time + 60: try: items_in_cbas_bucket, _ = self.cbas_util.get_num_items_in_cbas_dataset( self.cbas_dataset_name) except: pass if items_in_cbas_bucket < self.num_items * 3 and items_in_cbas_bucket > self.num_items: self.log.info("Data Ingestion Interrupted successfully") elif items_in_cbas_bucket < self.num_items: self.log.info( "Data Ingestion did not interrupted but restarting from 0.") else: self.log.info( "Data Ingestion did not interrupted but complete before service restart." ) run_count = 0 fail_count = 0 success_count = 0 aborted_count = 0 shell = RemoteMachineShellConnection(node_in_test) for handle in handles: status, hand = self.cbas_util.retrieve_request_status_using_handle( node_in_test, handle, shell) if status == "running": run_count += 1 self.log.info("query with handle %s is running." % handle) elif status == "failed": fail_count += 1 self.log.info("query with handle %s is failed." % handle) elif status == "success": success_count += 1 self.log.info("query with handle %s is successful." % handle) else: aborted_count += 1 self.log.info("Queued job is deleted: %s" % status) self.log.info("After service restart %s queued jobs are Running." % run_count) self.log.info("After service restart %s queued jobs are Failed." % fail_count) self.log.info("After service restart %s queued jobs are Successful." % success_count) self.log.info("After service restart %s queued jobs are Aborted." % aborted_count) if self.cbas_node_type == "NC": self.assertTrue(fail_count + aborted_count == 0, "Some queries failed/aborted") query = "select count(*) from {0};".format(self.cbas_dataset_name) self.cbas_util._run_concurrent_queries(query, "immediate", 100) if not self.cbas_util.validate_cbas_dataset_items_count( self.cbas_dataset_name, self.num_items * 3): self.fail( "No. of items in CBAS dataset do not match that in the CB bucket" ) def test_disk_full_ingest_data(self): self.cbas_node_type = self.input.param('cbas_node_type', None) if self.cbas_node_type == "CC": node_in_test = self.cbas_node self.cbas_util = CbasUtil(self.cluster.master, self.cluster.cbas_nodes[0]) else: node_in_test = self.cluster.cbas_nodes[0] remote_client = RemoteMachineShellConnection(node_in_test) output, error = remote_client.execute_command("rm -rf full_disk*", use_channel=True) remote_client.log_command_output(output, error) self.setup_for_test() query = "select sleep(count(*),50000) from {0};".format( self.cbas_dataset_name) handles = self.cbas_util._run_concurrent_queries(query, "async", 10) def _get_disk_usage_in_MB(remote_client): disk_info = remote_client.get_disk_info(in_MB=True) disk_space = disk_info[1].split()[-3][:-1] return disk_space du = int(_get_disk_usage_in_MB(remote_client)) - 50 chunk_size = 1024 while int(du) > 0: output, error = remote_client.execute_command( "dd if=/dev/zero of=full_disk{0} bs={1}M count=1".format( str(du) + "_MB" + str(time.time()), chunk_size), use_channel=True) remote_client.log_command_output(output, error) du -= 1024 if du < 1024: chunk_size = du self.ingestion_in_progress() items_in_cbas_bucket_before, _ = self.cbas_util.get_num_items_in_cbas_dataset( self.cbas_dataset_name) items_in_cbas_bucket_after, _ = self.cbas_util.get_num_items_in_cbas_dataset( self.cbas_dataset_name) try: while items_in_cbas_bucket_before != items_in_cbas_bucket_after: items_in_cbas_bucket_before, _ = self.cbas_util.get_num_items_in_cbas_dataset( self.cbas_dataset_name) self.sleep(2) items_in_cbas_bucket_after, _ = self.cbas_util.get_num_items_in_cbas_dataset( self.cbas_dataset_name) except: self.log.info("Ingestion interrupted and server seems to be down") if items_in_cbas_bucket_before == self.num_items * 3: self.log.info("Data Ingestion did not interrupted but completed.") elif items_in_cbas_bucket_before < self.num_items * 3: self.log.info("Data Ingestion Interrupted successfully") output, error = remote_client.execute_command("rm -rf full_disk*", use_channel=True) remote_client.log_command_output(output, error) remote_client.disconnect() self.sleep( 10, "wait for service to come up after disk space is made available.") run_count = 0 fail_count = 0 success_count = 0 aborted_count = 0 shell = RemoteMachineShellConnection(node_in_test) for handle in handles: status, hand = self.cbas_util.retrieve_request_status_using_handle( node_in_test, handle, shell) if status == "running": run_count += 1 self.log.info("query with handle %s is running." % handle) elif status == "failed": fail_count += 1 self.log.info("query with handle %s is failed." % handle) elif status == "success": success_count += 1 self.log.info("query with handle %s is successful." % handle) else: aborted_count += 1 self.log.info("Queued job is deleted: %s" % status) self.log.info("After service restart %s queued jobs are Running." % run_count) self.log.info("After service restart %s queued jobs are Failed." % fail_count) self.log.info("After service restart %s queued jobs are Successful." % success_count) self.log.info("After service restart %s queued jobs are Aborted." % aborted_count) if self.cbas_node_type == "NC": self.assertTrue(fail_count + aborted_count == 0, "Some queries failed/aborted") self.sleep(60) query = "select count(*) from {0};".format(self.cbas_dataset_name) self.cbas_util._run_concurrent_queries(query, "immediate", 100) if not self.cbas_util.validate_cbas_dataset_items_count( self.cbas_dataset_name, self.num_items * 3): self.fail( "No. of items in CBAS dataset do not match that in the CB bucket" ) def test_stop_network_ingest_data(self): self.setup_for_test() self.cbas_node_type = self.input.param('cbas_node_type', None) query = "select sleep(count(*),50000) from {0};".format( self.cbas_dataset_name) handles = self.cbas_util._run_concurrent_queries(query, "async", 10) self.ingestion_in_progress() # Add the code for stop network here: if self.cbas_node_type: if self.cbas_node_type == "CC": node_in_test = self.cbas_node self.cbas_util = CbasUtil(self.cluster.master, self.cluster.cbas_nodes[0]) self.cbas_util.createConn("default") else: node_in_test = self.cluster.cbas_nodes[0] # Stop network on KV node to mimic n/w partition on KV else: node_in_test = self.cluster.master items_in_cbas_bucket_before, _ = self.cbas_util.get_num_items_in_cbas_dataset( self.cbas_dataset_name) self.log.info("Intems before network down: %s" % items_in_cbas_bucket_before) RemoteMachineShellConnection(node_in_test).stop_network("30") # self.sleep(40, "Wait for network to come up.") items_in_cbas_bucket = 0 start_time = time.time() while (items_in_cbas_bucket == 0 or items_in_cbas_bucket == -1) and time.time() < start_time + 60: try: items_in_cbas_bucket, _ = self.cbas_util.get_num_items_in_cbas_dataset( self.cbas_dataset_name) except: pass # items_in_cbas_bucket_after, _ = self.cbas_util.get_num_items_in_cbas_dataset(self.cbas_dataset_name) self.log.info("Items after network is up: %s" % items_in_cbas_bucket) # start_time = time.time() # while items_in_cbas_bucket_after <=0 and time.time()<start_time+60: # items_in_cbas_bucket_after, _ = self.cbas_util.get_num_items_in_cbas_dataset(self.cbas_dataset_name) # self.sleep(1) # items_in_cbas_bucket = items_in_cbas_bucket_after if items_in_cbas_bucket < self.num_items * 3 and items_in_cbas_bucket > self.num_items: self.log.info("Data Ingestion Interrupted successfully") elif items_in_cbas_bucket < self.num_items: self.log.info( "Data Ingestion did not interrupted but restarting from 0.") else: self.log.info( "Data Ingestion did not interrupted but complete before service restart." ) run_count = 0 fail_count = 0 success_count = 0 aborted_count = 0 shell = RemoteMachineShellConnection(node_in_test) for handle in handles: status, hand = self.cbas_util.retrieve_request_status_using_handle( node_in_test, handle, shell) if status == "running": run_count += 1 self.log.info("query with handle %s is running." % handle) elif status == "failed": fail_count += 1 self.log.info("query with handle %s is failed." % handle) elif status == "success": success_count += 1 self.log.info("query with handle %s is successful." % handle) else: aborted_count += 1 self.log.info("Queued job is deleted: %s" % status) self.log.info("After service restart %s queued jobs are Running." % run_count) self.log.info("After service restart %s queued jobs are Failed." % fail_count) self.log.info("After service restart %s queued jobs are Successful." % success_count) self.log.info("After service restart %s queued jobs are Aborted." % aborted_count) if self.cbas_node_type == "NC": self.assertTrue(fail_count + aborted_count == 0, "Some queries failed/aborted") query = "select count(*) from {0};".format(self.cbas_dataset_name) self.cbas_util._run_concurrent_queries(query, "immediate", 100) if not self.cbas_util.validate_cbas_dataset_items_count( self.cbas_dataset_name, self.num_items * 3): self.fail( "No. of items in CBAS dataset do not match that in the CB bucket" ) def test_network_hardening(self): self.setup_for_test() end = self.num_items CC = self.cbas_node NC = self.cluster.cbas_nodes KV = self.cluster.master nodes = [CC] + NC for node in nodes: for i in xrange(2): NodeHelper.enable_firewall(node) start = end end = start + self.num_items tasks = self.perform_doc_ops_in_all_cb_buckets("create", start, end, batch_size=1000, _async=True) self.sleep( 30, "Sleep after enabling firewall on node %s then disbale it." % node.ip) NodeHelper.disable_firewall(node) items_in_cbas_bucket = 0 start_time = time.time() while (items_in_cbas_bucket == 0 or items_in_cbas_bucket == -1) and time.time() < start_time + 60: try: items_in_cbas_bucket, _ = self.cbas_util.get_num_items_in_cbas_dataset( self.cbas_dataset_name) except: pass self.log.info("Items after network is up: %s" % items_in_cbas_bucket) if items_in_cbas_bucket < end and items_in_cbas_bucket > start: self.log.info("Data Ingestion Interrupted successfully") elif items_in_cbas_bucket < start: self.log.info( "Data Ingestion did not interrupted but restarting from 0." ) else: self.log.info( "Data Ingestion did not interrupted but complete before service restart." ) query = "select count(*) from {0};".format( self.cbas_dataset_name) self.cbas_util._run_concurrent_queries(query, "immediate", 100) for task in tasks: self.task_manager.get_task_result(task) if not self.cbas_util.validate_cbas_dataset_items_count( self.cbas_dataset_name, end): self.fail( "No. of items in CBAS dataset do not match that in the CB bucket" ) NodeHelper.enable_firewall(node, bidirectional=True) start = end end = start + self.num_items tasks = self.perform_doc_ops_in_all_cb_buckets("create", start, end, batch_size=1000, _async=True) self.sleep( 30, "Sleep after enabling firewall on CC node then disbale it." ) NodeHelper.disable_firewall(node) items_in_cbas_bucket = 0 start_time = time.time() while (items_in_cbas_bucket == 0 or items_in_cbas_bucket == -1) and time.time() < start_time + 60: try: items_in_cbas_bucket, _ = self.cbas_util.get_num_items_in_cbas_dataset( self.cbas_dataset_name) except: pass self.log.info("Items after network is up: %s" % items_in_cbas_bucket) if items_in_cbas_bucket < end and items_in_cbas_bucket > start: self.log.info("Data Ingestion Interrupted successfully") elif items_in_cbas_bucket < start: self.log.info( "Data Ingestion did not interrupted but restarting from 0." ) else: self.log.info( "Data Ingestion did not interrupted but complete before service restart." ) query = "select count(*) from {0};".format( self.cbas_dataset_name) self.cbas_util._run_concurrent_queries(query, "immediate", 100) for task in tasks: self.task_manager.get_task_result(task) if not self.cbas_util.validate_cbas_dataset_items_count( self.cbas_dataset_name, end): self.fail( "No. of items in CBAS dataset do not match that in the CB bucket" )
class MetadataReplication(CBASBaseTest): def tearDown(self): CBASBaseTest.tearDown(self) def setUp(self): self.input = TestInputSingleton.input self.input.test_params.update({"default_bucket": False}) super(MetadataReplication, self).setUp() self.nc_otpNodes = [] if "add_all_cbas_nodes" in self.input.test_params and self.input.test_params[ "add_all_cbas_nodes"] and len(self.cluster.cbas_nodes) > 0: self.nc_otpNodes = self.add_all_nodes_then_rebalance( self.cluster.cbas_nodes) elif self.input.param("nc_nodes_to_add", 0): self.nc_otpNodes = self.add_all_nodes_then_rebalance( self.cluster.cbas_nodes[:self.input.param("nc_nodes_to_add")]) self.otpNodes += self.nc_otpNodes self.bucket_util.create_default_bucket(self.cluster, storage=self.bucket_storage) self.cbas_util.createConn("default") self.shell = RemoteMachineShellConnection(self.cluster.master) #test for number of partitions: self.partitions_dict = self.cbas_util.get_num_partitions(self.shell) # if self.cluster.master.cbas_path: # for key in self.partitions_dict.keys(): # self.assertTrue(self.partitions_dict[key] == len(ast.literal_eval(self.cluster.master.cbas_path)), "Number of partitions created are incorrect on cbas nodes.") def setup_for_test(self, skip_data_loading=False): if not skip_data_loading: # Load Couchbase bucket first. self.perform_doc_ops_in_all_cb_buckets("create", 0, self.num_items, batch_size=1000) # Create dataset on the CBAS bucket self.cbas_util.create_dataset_on_bucket( cbas_bucket_name=self.cb_bucket_name, cbas_dataset_name=self.cbas_dataset_name) # Create indexes on the CBAS bucket self.create_secondary_indexes = self.input.param( "create_secondary_indexes", False) if self.create_secondary_indexes: self.index_fields = "profession:string,number:bigint" create_idx_statement = "create index {0} on {1}({2});".format( self.index_name, self.cbas_dataset_name, self.index_fields) status, metrics, errors, results, _ = self.cbas_util.execute_statement_on_cbas_util( create_idx_statement) self.assertTrue(status == "success", "Create Index query failed") self.assertTrue( self.cbas_util.verify_index_created( self.index_name, self.index_fields.split(","), self.cbas_dataset_name)[0]) # Connect to Bucket self.cbas_util.connect_to_bucket( cbas_bucket_name=self.cbas_bucket_name, cb_bucket_password=self.cb_bucket_password) if not skip_data_loading: # Validate no. of items in CBAS dataset if not self.cbas_util.validate_cbas_dataset_items_count( self.cbas_dataset_name, self.num_items): self.fail( "No. of items in CBAS dataset do not match that in the CB bucket" ) def ingestion_in_progress(self): self.cbas_util.disconnect_from_bucket(self.cbas_bucket_name) self.perform_doc_ops_in_all_cb_buckets("create", 0, self.num_items * 2, batch_size=1000) self.cbas_util.connect_to_bucket( cbas_bucket_name=self.cbas_bucket_name, cb_bucket_password=self.cb_bucket_password) def ingest_more_data(self): self.cbas_util.disconnect_from_bucket(self.cbas_bucket_name) self.perform_doc_ops_in_all_cb_buckets("create", self.num_items * 2, self.num_items * 4, batch_size=1000) self.cbas_util.connect_to_bucket( cbas_bucket_name=self.cbas_bucket_name, cb_bucket_password=self.cb_bucket_password) if not self.cbas_util.validate_cbas_dataset_items_count( self.cbas_dataset_name, self.num_items * 4): self.fail( "No. of items in CBAS dataset do not match that in the CB bucket" ) def test_rebalance(self): self.setup_for_test(skip_data_loading=True) self.rebalance_type = self.input.param('rebalance_type', 'out') self.rebalance_node = self.input.param('rebalance_node', 'CC') self.how_many = self.input.param('how_many', 1) self.restart_rebalance = self.input.param('restart_rebalance', False) self.replica_change = self.input.param('replica_change', 0) query = "select sleep(count(*),50000) from {0};".format( self.cbas_dataset_name) handles = self.cbas_util._run_concurrent_queries(query, "async", 10) self.ingestion_in_progress() otpNodes = [] if self.rebalance_node == "CC": node_in_test = [self.cbas_node] otpNodes = [self.otpNodes[0]] self.cbas_util.closeConn() self.cbas_util = CbasUtil(self.cluster.master, self.cluster.cbas_nodes[0]) self.cbas_util.createConn("default") self.cbas_node = self.cluster.cbas_nodes[0] elif self.rebalance_node == "NC": node_in_test = self.cluster.cbas_nodes[:self.how_many] otpNodes = self.nc_otpNodes[:self.how_many] else: node_in_test = [self.cbas_node ] + self.cluster.cbas_nodes[:self.how_many] otpNodes = self.otpNodes[:self.how_many + 1] self.cbas_util.closeConn() self.cbas_util = CbasUtil(self.cluster.master, self.cluster.cbas_nodes[self.how_many]) self.cbas_util.createConn("default") replicas_before_rebalance = len( self.cbas_util.get_replicas_info(self.shell)) if self.rebalance_type == 'in': if self.restart_rebalance: self.cluster_util.add_all_nodes_then_rebalance( self.cluster, self.cluster.cbas_nodes[ self.input.param("nc_nodes_to_add"):self.how_many + self.input.param("nc_nodes_to_add")], wait_for_completion=False) self.sleep(2) if self.rest._rebalance_progress_status() == "running": self.assertTrue(self.rest.stop_rebalance(wait_timeout=120), "Failed while stopping rebalance.") self.sleep( 30, "Wait for some tine after rebalance is stopped.") else: self.fail( "Rebalance completed before the test could have stopped rebalance." ) self.rebalance(wait_for_completion=False) else: self.cluster_util.add_all_nodes_then_rebalance( self.cluster, self.cluster.cbas_nodes[ self.input.param("nc_nodes_to_add"):self.how_many + self.input.param("nc_nodes_to_add")], wait_for_completion=False) replicas_before_rebalance += self.replica_change else: if self.restart_rebalance: self.cluster_util.remove_node(self.cluster, otpNodes, wait_for_rebalance=False) self.sleep(2) if self.rest._rebalance_progress_status() == "running": self.assertTrue(self.rest.stop_rebalance(wait_timeout=120), "Failed while stopping rebalance.") self.sleep( 30, "Wait for some tine after rebalance is stopped.") else: self.fail( "Rebalance completed before the test could have stopped rebalance." ) self.rebalance(wait_for_completion=False, ejected_nodes=[node.id for node in otpNodes]) else: self.cluster_util.remove_node(self.cluster, otpNodes, wait_for_rebalance=False) replicas_before_rebalance -= self.replica_change self.sleep(30) str_time = time.time() while self.rest._rebalance_progress_status( ) == "running" and time.time() < str_time + 300: replicas = self.cbas_util.get_replicas_info(self.shell) if replicas: for replica in replicas: self.log.info("replica state during rebalance: %s" % replica['status']) self.sleep(2) replicas = self.cbas_util.get_replicas_info(self.shell) replicas_after_rebalance = len(replicas) self.assertEqual( replicas_after_rebalance, replicas_before_rebalance, "%s,%s" % (replicas_after_rebalance, replicas_before_rebalance)) for replica in replicas: self.log.info("replica state during rebalance: %s" % replica['status']) self.assertEqual( replica['status'], "IN_SYNC", "Replica state is incorrect: %s" % replica['status']) # items_in_cbas_bucket, _ = self.cbas_util.get_num_items_in_cbas_dataset(self.cbas_dataset_name) # self.log.info("Items before service restart: %s"%items_in_cbas_bucket) count = 0 while self.cbas_util.fetch_analytics_cluster_response( )['state'] != "ACTIVE" and count < 60: self.sleep(5) count += 1 items_in_cbas_bucket = 0 start_time = time.time() while (items_in_cbas_bucket == 0 or items_in_cbas_bucket == -1) and time.time() < start_time + 60: try: items_in_cbas_bucket, _ = self.cbas_util.get_num_items_in_cbas_dataset( self.cbas_dataset_name) except: pass self.sleep(1) self.log.info("After rebalance operation docs in CBAS bucket : %s" % items_in_cbas_bucket) if items_in_cbas_bucket < self.num_items * 2 and items_in_cbas_bucket > self.num_items: self.log.info("Data Ingestion Interrupted successfully") elif items_in_cbas_bucket < self.num_items: self.log.info( "Data Ingestion did interrupted and restarting from 0.") else: self.log.info( "Data Ingestion did not interrupted but complete before rebalance operation." ) run_count = 0 fail_count = 0 success_count = 0 aborted_count = 0 shell = RemoteMachineShellConnection(node_in_test[0]) for handle in handles: status, hand = self.cbas_util.retrieve_request_status_using_handle( node_in_test, handle, shell) if status == "running": run_count += 1 self.log.info("query with handle %s is running." % handle) elif status == "failed": fail_count += 1 self.log.info("query with handle %s is failed." % handle) elif status == "success": success_count += 1 self.log.info("query with handle %s is successful." % handle) else: aborted_count += 1 self.log.info("Queued job is deleted: %s" % status) self.log.info("After service restart %s queued jobs are Running." % run_count) self.log.info("After service restart %s queued jobs are Failed." % fail_count) self.log.info("After service restart %s queued jobs are Successful." % success_count) self.log.info("After service restart %s queued jobs are Aborted." % aborted_count) if self.rebalance_node == "NC": self.assertTrue(aborted_count == 0, "Some queries aborted") query = "select count(*) from {0};".format(self.cbas_dataset_name) self.cbas_util._run_concurrent_queries(query, "immediate", 100) if not self.cbas_util.validate_cbas_dataset_items_count( self.cbas_dataset_name, self.num_items * 2): self.fail( "No. of items in CBAS dataset do not match that in the CB bucket" ) self.ingest_more_data() def test_cancel_CC_rebalance(self): pass def test_chain_rebalance_out_cc(self): self.setup_for_test(skip_data_loading=True) self.ingestion_in_progress() total_cbas_nodes = len(self.otpNodes) while total_cbas_nodes > 1: cc_ip = self.cbas_util.retrieve_cc_ip(shell=self.shell) for otpnode in self.otpNodes: if otpnode.ip == cc_ip: self.cluster_util.remove_node(self.cluster, [otpnode], wait_for_rebalance=True) for server in self.cluster.cbas_nodes: if cc_ip != server.ip: self.cbas_util.closeConn() self.cbas_util = CbasUtil(self.cluster.master, server) self.cbas_util.createConn("default") self.cbas_node = server break # items_in_cbas_bucket, _ = self.cbas_util.get_num_items_in_cbas_dataset(self.cbas_dataset_name) # self.log.info("Items before service restart: %s"%items_in_cbas_bucket) items_in_cbas_bucket = 0 start_time = time.time() while (items_in_cbas_bucket == 0 or items_in_cbas_bucket == -1) and time.time() < start_time + 60: try: items_in_cbas_bucket, _ = self.cbas_util.get_num_items_in_cbas_dataset( self.cbas_dataset_name) except: pass self.sleep(1) self.log.info( "After rebalance operation docs in CBAS bucket : %s" % items_in_cbas_bucket) if items_in_cbas_bucket < self.num_items * 2 and items_in_cbas_bucket > self.num_items: self.log.info( "Data Ingestion Interrupted successfully") elif items_in_cbas_bucket < self.num_items: self.log.info( "Data Ingestion did interrupted and restarting from 0." ) else: self.log.info( "Data Ingestion did not interrupted but complete before rebalance operation." ) query = "select count(*) from {0};".format( self.cbas_dataset_name) self.cbas_util._run_concurrent_queries( query, "immediate", 10) break total_cbas_nodes -= 1 if not self.cbas_util.validate_cbas_dataset_items_count( self.cbas_dataset_name, self.num_items * 2): self.fail( "No. of items in CBAS dataset do not match that in the CB bucket" ) self.ingest_more_data() def test_cc_swap_rebalance(self): self.restart_rebalance = self.input.param('restart_rebalance', False) self.setup_for_test(skip_data_loading=True) query = "select sleep(count(*),50000) from {0};".format( self.cbas_dataset_name) handles = self.cbas_util._run_concurrent_queries(query, "async", 10) self.ingestion_in_progress() replicas_before_rebalance = len( self.cbas_util.get_replicas_info(self.shell)) self.cluster_util.add_node(node=self.cluster.cbas_nodes[-1], rebalance=False) swap_nc = self.input.param('swap_nc', False) if not swap_nc: out_nodes = [self.otpNodes[0]] self.cbas_util.closeConn() self.cbas_util = CbasUtil(self.cluster.master, self.cluster.cbas_nodes[0]) self.cbas_util.createConn("default") self.cbas_node = self.cluster.cbas_nodes[0] else: out_nodes = [self.otpNodes[1]] self.cluster_util.remove_node(self.cluster, out_nodes, wait_for_rebalance=False) self.sleep(5, "Wait for sometime after rebalance started.") if self.restart_rebalance: if self.rest._rebalance_progress_status() == "running": self.assertTrue(self.rest.stop_rebalance(wait_timeout=120), "Failed while stopping rebalance.") self.sleep(10) else: self.fail( "Rebalance completed before the test could have stopped rebalance." ) self.rebalance(ejected_nodes=[node.id for node in out_nodes], wait_for_completion=False) self.sleep(5) str_time = time.time() while self.rest._rebalance_progress_status( ) == "running" and time.time() < str_time + 300: replicas = self.cbas_util.get_replicas_info(self.shell) if replicas: for replica in replicas: self.log.info("replica state during rebalance: %s" % replica['status']) self.sleep(30) replicas = self.cbas_util.get_replicas_info(self.shell) replicas_after_rebalance = len(replicas) self.assertEqual( replicas_after_rebalance, replicas_before_rebalance, "%s,%s" % (replicas_after_rebalance, replicas_before_rebalance)) for replica in replicas: self.log.info("replica state during rebalance: %s" % replica['status']) self.assertEqual( replica['status'], "IN_SYNC", "Replica state is incorrect: %s" % replica['status']) # items_in_cbas_bucket, _ = self.cbas_util.get_num_items_in_cbas_dataset(self.cbas_dataset_name) # self.log.info("Items before service restart: %s"%items_in_cbas_bucket) items_in_cbas_bucket = 0 start_time = time.time() while (items_in_cbas_bucket == 0 or items_in_cbas_bucket == -1) and time.time() < start_time + 60: try: items_in_cbas_bucket, _ = self.cbas_util.get_num_items_in_cbas_dataset( self.cbas_dataset_name) except: pass self.sleep(1) self.log.info("After rebalance operation docs in CBAS bucket : %s" % items_in_cbas_bucket) if items_in_cbas_bucket < self.num_items * 2 and items_in_cbas_bucket > self.num_items: self.log.info("Data Ingestion Interrupted successfully") elif items_in_cbas_bucket < self.num_items: self.log.info( "Data Ingestion did interrupted and restarting from 0.") else: self.log.info( "Data Ingestion did not interrupted but complete before rebalance operation." ) run_count = 0 fail_count = 0 success_count = 0 aborted_count = 0 shell = RemoteMachineShellConnection(self.cluster.master) for handle in handles: status, hand = self.cbas_util.retrieve_request_status_using_handle( self.cluster.master, handle, shell) if status == "running": run_count += 1 self.log.info("query with handle %s is running." % handle) elif status == "failed": fail_count += 1 self.log.info("query with handle %s is failed." % handle) elif status == "success": success_count += 1 self.log.info("query with handle %s is successful." % handle) else: aborted_count += 1 self.log.info("Queued job is deleted: %s" % status) self.log.info("After service restart %s queued jobs are Running." % run_count) self.log.info("After service restart %s queued jobs are Failed." % fail_count) self.log.info("After service restart %s queued jobs are Successful." % success_count) self.log.info("After service restart %s queued jobs are Aborted." % aborted_count) query = "select count(*) from {0};".format(self.cbas_dataset_name) self.cbas_util._run_concurrent_queries(query, "immediate", 100) if not self.cbas_util.validate_cbas_dataset_items_count( self.cbas_dataset_name, self.num_items * 2): self.fail( "No. of items in CBAS dataset do not match that in the CB bucket" ) self.ingest_more_data() def test_reboot_nodes(self): #Test for reboot CC and reboot all nodes. self.setup_for_test(skip_data_loading=True) self.ingestion_in_progress() self.node_type = self.input.param('node_type', 'CC') replica_nodes_before_reboot = self.cbas_util.get_replicas_info( self.shell) replicas_before_reboot = len( self.cbas_util.get_replicas_info(self.shell)) if self.node_type == "CC": shell = RemoteMachineShellConnection(self.cbas_node) shell.reboot_server_and_wait_for_cb_run(self.cluster_util, self.cbas_node) shell.disconnect() elif self.node_type == "NC": for server in self.cluster.cbas_nodes: shell = RemoteMachineShellConnection(server) shell.reboot_server_and_wait_for_cb_run( self.cluster_util, server) shell.disconnect() else: shell = RemoteMachineShellConnection(self.cbas_node) shell.reboot_server_and_wait_for_cb_run(self.cluster_util, self.cbas_node) shell.disconnect() for server in self.cluster.cbas_nodes: shell = RemoteMachineShellConnection(server) shell.reboot_server_and_wait_for_cb_run( self.cluster_util, server) shell.disconnect() self.sleep(60) replica_nodes_after_reboot = self.cbas_util.get_replicas_info( self.shell) replicas_after_reboot = len(replica_nodes_after_reboot) self.assertTrue( replica_nodes_after_reboot == replica_nodes_before_reboot, "Replica nodes changed after reboot. Before: %s , After : %s" % (replica_nodes_before_reboot, replica_nodes_after_reboot)) self.assertTrue( replicas_after_reboot == replicas_before_reboot, "Number of Replica nodes changed after reboot. Before: %s , After : %s" % (replicas_before_reboot, replicas_after_reboot)) items_in_cbas_bucket = 0 start_time = time.time() while (items_in_cbas_bucket == 0 or items_in_cbas_bucket == -1) and time.time() < start_time + 60: try: items_in_cbas_bucket, _ = self.cbas_util.get_num_items_in_cbas_dataset( self.cbas_dataset_name) except: pass self.sleep(1) query = "select count(*) from {0};".format(self.cbas_dataset_name) self.cbas_util._run_concurrent_queries(query, "immediate", 100) if not self.cbas_util.validate_cbas_dataset_items_count( self.cbas_dataset_name, self.num_items * 2): self.fail( "No. of items in CBAS dataset do not match that in the CB bucket" ) for replica in replica_nodes_after_reboot: self.log.info("replica state during rebalance: %s" % replica['status']) self.assertEqual( replica['status'], "IN_SYNC", "Replica state is incorrect: %s" % replica['status']) self.ingest_more_data() def test_failover(self): self.setup_for_test(skip_data_loading=True) self.rebalance_node = self.input.param('rebalance_node', 'CC') self.how_many = self.input.param('how_many', 1) self.restart_rebalance = self.input.param('restart_rebalance', False) self.replica_change = self.input.param('replica_change', 0) self.add_back = self.input.param('add_back', False) query = "select sleep(count(*),50000) from {0};".format( self.cbas_dataset_name) handles = self.cbas_util._run_concurrent_queries(query, "async", 10) self.ingestion_in_progress() if self.rebalance_node == "CC": node_in_test = [self.cbas_node] otpNodes = [self.otpNodes[0]] self.cbas_util.closeConn() self.cbas_util = CbasUtil(self.cluster.master, self.cluster.cbas_nodes[0]) self.cbas_util.createConn("default") self.cbas_node = self.cluster.cbas_nodes[0] elif self.rebalance_node == "NC": node_in_test = self.cluster.cbas_nodes[:self.how_many] otpNodes = self.nc_otpNodes[:self.how_many] else: node_in_test = [self.cbas_node ] + self.cluster.cbas_nodes[:self.how_many] otpNodes = self.otpNodes[:self.how_many + 1] self.cbas_util.closeConn() self.cbas_util = CbasUtil(self.cluster.master, self.cluster.cbas_nodes[self.how_many]) self.cbas_util.createConn("default") replicas_before_rebalance = len( self.cbas_util.get_replicas_info(self.shell)) items_in_cbas_bucket = 0 start_time = time.time() while (items_in_cbas_bucket == 0 or items_in_cbas_bucket == -1) and time.time() < start_time + 60: try: items_in_cbas_bucket, _ = self.cbas_util.get_num_items_in_cbas_dataset( self.cbas_dataset_name) except: pass self.sleep(1) self.log.info("Items before failover node: %s" % items_in_cbas_bucket) if self.restart_rebalance: graceful_failover = self.input.param("graceful_failover", False) failover_task = self._cb_cluster.async_failover( self.input.servers, node_in_test, graceful_failover) self.task_manager.get_task_result(failover_task) if self.add_back: for otpnode in otpNodes: self.rest.set_recovery_type('ns_1@' + otpnode.ip, "full") self.rest.add_back_node('ns_1@' + otpnode.ip) self.rebalance(wait_for_completion=False) else: self.rebalance(ejected_nodes=[node.id for node in otpNodes], wait_for_completion=False) self.sleep(2) if self.rest._rebalance_progress_status() == "running": self.assertTrue(self.rest.stop_rebalance(wait_timeout=120), "Failed while stopping rebalance.") if self.add_back: self.rebalance(wait_for_completion=False) else: self.rebalance( ejected_nodes=[node.id for node in otpNodes], wait_for_completion=False) else: self.fail( "Rebalance completed before the test could have stopped rebalance." ) else: graceful_failover = self.input.param("graceful_failover", False) failover_task = self._cb_cluster.async_failover( self.input.servers, node_in_test, graceful_failover) self.task_manager.get_task_result(failover_task) if self.add_back: for otpnode in otpNodes: self.rest.set_recovery_type('ns_1@' + otpnode.ip, "full") self.rest.add_back_node('ns_1@' + otpnode.ip) self.rebalance(wait_for_completion=False) replicas_before_rebalance -= self.replica_change self.sleep(5) str_time = time.time() while self.rest._rebalance_progress_status( ) == "running" and time.time() < str_time + 300: replicas = self.cbas_util.get_replicas_info(self.shell) if replicas: for replica in replicas: self.log.info("replica state during rebalance: %s" % replica['status']) self.sleep(15) replicas = self.cbas_util.get_replicas_info(self.shell) replicas_after_rebalance = len(replicas) self.assertEqual( replicas_after_rebalance, replicas_before_rebalance, "%s,%s" % (replicas_after_rebalance, replicas_before_rebalance)) for replica in replicas: self.log.info("replica state during rebalance: %s" % replica['status']) self.assertEqual( replica['status'], "IN_SYNC", "Replica state is incorrect: %s" % replica['status']) items_in_cbas_bucket = 0 start_time = time.time() while (items_in_cbas_bucket == 0 or items_in_cbas_bucket == -1) and time.time() < start_time + 60: try: items_in_cbas_bucket, _ = self.cbas_util.get_num_items_in_cbas_dataset( self.cbas_dataset_name) except: pass self.sleep(1) self.log.info("After rebalance operation docs in CBAS bucket : %s" % items_in_cbas_bucket) if items_in_cbas_bucket < self.num_items * 2 and items_in_cbas_bucket > self.num_items: self.log.info("Data Ingestion Interrupted successfully") elif items_in_cbas_bucket < self.num_items: self.log.info( "Data Ingestion did interrupted and restarting from 0.") else: self.log.info( "Data Ingestion did not interrupted but complete before rebalance operation." ) run_count = 0 fail_count = 0 success_count = 0 aborted_count = 0 shell = RemoteMachineShellConnection(node_in_test[0]) for handle in handles: status, hand = self.cbas_util.retrieve_request_status_using_handle( node_in_test, handle, shell) if status == "running": run_count += 1 self.log.info("query with handle %s is running." % handle) elif status == "failed": fail_count += 1 self.log.info("query with handle %s is failed." % handle) elif status == "success": success_count += 1 self.log.info("query with handle %s is successful." % handle) else: aborted_count += 1 self.log.info("Queued job is deleted: %s" % status) self.log.info("After service restart %s queued jobs are Running." % run_count) self.log.info("After service restart %s queued jobs are Failed." % fail_count) self.log.info("After service restart %s queued jobs are Successful." % success_count) self.log.info("After service restart %s queued jobs are Aborted." % aborted_count) if self.rebalance_node == "NC": self.assertTrue(aborted_count == 0, "Some queries aborted") query = "select count(*) from {0};".format(self.cbas_dataset_name) self.cbas_util._run_concurrent_queries(query, "immediate", 100) if not self.cbas_util.validate_cbas_dataset_items_count( self.cbas_dataset_name, self.num_items * 2): self.fail( "No. of items in CBAS dataset do not match that in the CB bucket" ) self.ingest_more_data()