def test_analytics_recovery_on_idle_system(self): self.log.info("Load data, create cbas buckets, and datasets") self.set_up_test() self.log.info("Wait for ingestion to complete") self.cbas_util.wait_for_ingestion_complete([self.dataset_name], self.num_items) self.assertTrue( self.cbas_util.validate_cbas_dataset_items_count( self.dataset_name, self.num_items)) self.log.info("Get the nodes on which kill is to be run") self.nodes_to_kill_service_on = [] if self.kill_on_cc: self.nodes_to_kill_service_on.append(self.cbas_node) if self.kill_on_nc: for cbas_server in self.cbas_servers: self.nodes_to_kill_service_on.append(cbas_server) self.log.info("Establish a remote connection on node and kill service") for node in self.nodes_to_kill_service_on: shell = RemoteMachineShellConnection(node) shell.kill_process(self.process, self.service, signum=self.signum) self.sleep( 5, "Sleeping for 5 seconds as after killing the service the service takes some time to exit and the service checks get pass by that time." ) self.log.info("Wait for cluster to be active") self.assertTrue(self.cbas_util.wait_for_cbas_to_recover(), msg="Analytics service unavailable") self.log.info("Observe no reingestion on node after restart") items_in_cbas_bucket, _ = self.cbas_util.get_num_items_in_cbas_dataset( self.dataset_name) self.assertTrue( items_in_cbas_bucket > 0, msg= "Items in CBAS bucket must greather than 0. If not re-ingestion has happened" ) self.assertTrue( self.cbas_util.validate_cbas_dataset_items_count( self.dataset_name, self.num_items)) self.log.info("Add more documents in the default bucket") self.perform_doc_ops_in_all_cb_buckets(self.num_items, "create", self.num_items, self.num_items * 2, exp=0, batch_size=self.batch_size) self.log.info("Wait for ingestion to complete") self.assertTrue( self.cbas_util.validate_cbas_dataset_items_count( self.dataset_name, self.num_items * 2))
def test_analytics_recovery_on_busy_system(self): self.log.info("Load data, create cbas buckets, and datasets") self.set_up_test() self.log.info("Wait for ingestion to complete") self.cbas_util.wait_for_ingestion_complete([self.dataset_name], self.num_items) self.assertTrue(self.cbas_util.validate_cbas_dataset_items_count(self.dataset_name, self.num_items)) self.log.info("Get the nodes on which kill is to be run") self.nodes_to_kill_service_on = [] if self.kill_on_cc: neglect_failures = True self.nodes_to_kill_service_on.append(self.cbas_node) if self.kill_on_nc: for cbas_server in self.cbas_servers: self.nodes_to_kill_service_on.append(cbas_server) self.log.info("Run concurrent queries to simulate busy system") statement = "select sleep(count(*),50000) from {0} where mutated=0;".format(self.dataset_name) try: self.cbas_util._run_concurrent_queries(statement, "async", 500, batch_size=100) except Exception as e: if neglect_failures: self.log.info("Neglecting failed queries, to handle killing Java/Cbas process kill on CC & NC node %s"%e) else: raise e self.log.info("Establish a remote connection on node and kill service") for node in self.nodes_to_kill_service_on: shell = RemoteMachineShellConnection(node) shell.kill_process(self.process, self.service, signum=self.signum) self.sleep(20, message="wait for service to be back again...") self.log.info("Observe no reingestion on node after restart") start_time = time.time() while time.time() < start_time + 120: try: items_in_cbas_bucket, _ = self.cbas_util.get_num_items_in_cbas_dataset(self.dataset_name) if items_in_cbas_bucket != -1: break except: pass self.assertTrue(items_in_cbas_bucket > 0, msg="Items in CBAS bucket must greather than 0. If not re-ingestion has happened") self.assertTrue(self.cbas_util.validate_cbas_dataset_items_count(self.dataset_name, self.num_items)) self.log.info("Add more documents in the default bucket") self.perform_doc_ops_in_all_cb_buckets(self.num_items, "create", self.num_items, self.num_items * 2, exp=0, batch_size=self.batch_size) self.log.info("Wait for ingestion to complete") self.cbas_util.wait_for_ingestion_complete([self.dataset_name], self.num_items * 2) self.assertTrue(self.cbas_util.validate_cbas_dataset_items_count(self.dataset_name, self.num_items * 2))
def test_analytics_recovery_on_idle_system(self): self.log.info("Load data, create cbas buckets, and datasets") self.set_up_test() self.log.info("Wait for ingestion to complete") self.cbas_util.wait_for_ingestion_complete([self.dataset_name], self.num_items) self.assertTrue(self.cbas_util.validate_cbas_dataset_items_count(self.dataset_name, self.num_items)) self.log.info("Get the nodes on which kill is to be run") self.nodes_to_kill_service_on = [] if self.kill_on_cc: self.nodes_to_kill_service_on.append(self.cbas_node) if self.kill_on_nc: for cbas_server in self.cbas_servers: self.nodes_to_kill_service_on.append(cbas_server) self.log.info("Establish a remote connection on node and kill service") for node in self.nodes_to_kill_service_on: shell = RemoteMachineShellConnection(node) shell.kill_process(self.process, self.service, signum=self.signum) self.log.info("Observe no reingestion on node after restart") start_time = time.time() while time.time() < start_time + 120: try: items_in_cbas_bucket, _ = self.cbas_util.get_num_items_in_cbas_dataset(self.dataset_name) if items_in_cbas_bucket != -1: break except: pass self.assertTrue(items_in_cbas_bucket > 0, msg="Items in CBAS bucket must greather than 0. If not re-ingestion has happened") self.assertTrue(self.cbas_util.validate_cbas_dataset_items_count(self.dataset_name, self.num_items)) self.log.info("Add more documents in the default bucket") self.perform_doc_ops_in_all_cb_buckets(self.num_items, "create", self.num_items, self.num_items * 2, exp=0, batch_size=self.batch_size) self.log.info("Wait for ingestion to complete") self.cbas_util.wait_for_ingestion_complete([self.dataset_name], self.num_items * 2) self.assertTrue(self.cbas_util.validate_cbas_dataset_items_count(self.dataset_name, self.num_items * 2))
def test_logging_configurations_are_restored_post_service_restarts(self): self.log.info("Add a cbas node") result = self.add_node(self.cbas_servers[0], services=["cbas"], rebalance=True) self.assertTrue(result, msg="Failed to add CBAS node") self.log.info("Delete all loggers") self.cbas_util.delete_all_loggers_on_cbas() self.log.info("Set the logging level using the json object") status, content, response = self.cbas_util.set_log_level_on_cbas( CbasLogging.DEFAULT_LOGGER_CONFIG_DICT) self.assertTrue(status, msg="Response status incorrect for SET request") self.log.info("Delete specific logger") logger_name = self.input.param("logger_name_to_delete", "com.couchbase.client.core.node") status, content, response = self.cbas_util.delete_specific_cbas_log_level( logger_name) self.assertTrue(status, msg="Status mismatch for DELETE") del CbasLogging.DEFAULT_LOGGER_CONFIG_DICT[logger_name] self.log.info("Update specific logger") logger_name = self.input.param("logger_name_to_update", "org.apache.hyracks") logger_level_to_update = self.input.param("logger_level_to_update", "FATAL") status, response, content = self.cbas_util.set_specific_log_level_on_cbas( logger_name, logger_level_to_update) self.assertTrue(status, msg="Status mismatch for SET") CbasLogging.DEFAULT_LOGGER_CONFIG_DICT[ logger_name] = logger_level_to_update self.log.info("Add a new logger") logger_name = self.input.param("logger_name_to_add", "org.apache.hyracks123") logger_level_to_add = self.input.param("logger_level_to_add", "ALL") status, response, content = self.cbas_util.set_specific_log_level_on_cbas( logger_name, logger_level_to_add) self.assertTrue(status, msg="Status mismatch for SET") CbasLogging.DEFAULT_LOGGER_CONFIG_DICT[ logger_name] = logger_level_to_add self.log.info("Verify logging configuration that we set on cbas Node") for name, level in CbasLogging.DEFAULT_LOGGER_CONFIG_DICT.items(): status, content, response = self.cbas_util.get_specific_cbas_log_level( name) self.assertTrue(status, msg="Response status incorrect for GET request") self.assertEquals(content, level, msg="Logger configuration mismatch for logger " + name) self.sleep( timeout=10, message= "Waiting for logger configuration to be copied across cbas nodes") self.log.info("Verify logging configuration on other cbas node") for name, level in CbasLogging.DEFAULT_LOGGER_CONFIG_DICT.items(): status, content, response = cbas_utils( self.master, self.cbas_servers[0]).get_specific_cbas_log_level(name) self.assertTrue(status, msg="Response status incorrect for GET request") self.assertEquals(content, level, msg="Logger configuration mismatch for logger " + name) self.log.info("Read input params") process_name = self.input.param('process_name', None) service_name = self.input.param('service_name', None) restart_couchbase = self.input.param('restart_couchbase', False) reboot = self.input.param('reboot', False) kill_services = self.input.param('kill_services', False) self.log.info("Establish a remote connection") shell_cc = RemoteMachineShellConnection(self.cbas_node) shell_nc = RemoteMachineShellConnection(self.cbas_servers[0]) if kill_services: self.log.info("Kill the %s service on CC cbas node" % service_name) shell_cc.kill_process(process_name, service_name) self.log.info("Kill the %s service on other cbas node" % service_name) shell_nc.kill_process(process_name, service_name) if restart_couchbase: self.log.info("Restart couchbase CC node ") shell_cc.restart_couchbase() self.log.info("Restart couchbase NC node ") shell_nc.restart_couchbase() if reboot: self.log.info("Reboot couchbase CC node") NodeHelper.reboot_server(self.cbas_node, self) self.log.info("Reboot couchbase NC node") NodeHelper.reboot_server(self.cbas_servers[0], self) end_time = datetime.datetime.now() + datetime.timedelta(minutes=int(1)) self.log.info( "Wait for nodes to be bootstrapped, neglect the unreachable server exceptions" ) while datetime.datetime.now() < end_time: try: self.log.info("Get the logging configurations") status, content, response = self.cbas_util.get_log_level_on_cbas( ) self.assertTrue( status, msg="Response status incorrect for GET request") self.log.info("Convert response to a dictionary") log_dict = CbasLogging.convert_logger_get_result_to_a_dict( content) if len(log_dict) >= len( CbasLogging.DEFAULT_LOGGER_CONFIG_DICT): break except Exception as e: pass self.log.info("Verify logging configuration post service kill") for name, level in CbasLogging.DEFAULT_LOGGER_CONFIG_DICT.items(): status, content, response = self.cbas_util.get_specific_cbas_log_level( name) self.assertTrue(status, msg="Response status incorrect for GET request") self.assertEquals(content, level, msg="Logger configuration mismatch for logger " + name) self.sleep( timeout=10, message= "Waiting for logger configuration to be copied across cbas nodes") self.log.info( "Verify logging configuration on other cbas node post service kill" ) for name, level in CbasLogging.DEFAULT_LOGGER_CONFIG_DICT.items(): status, content, response = cbas_utils( self.master, self.cbas_servers[0]).get_specific_cbas_log_level(name) self.assertTrue(status, msg="Response status incorrect for GET request") self.assertEquals(content, level, msg="Logger configuration mismatch for logger " + name)
def test_logging_configurations_are_restored_post_service_restarts(self): self.log.info("Add a cbas node") result = self.add_node(self.cbas_servers[0], services=["cbas"], rebalance=True) self.assertTrue(result, msg="Failed to add CBAS node") self.log.info("Delete all loggers") self.cbas_util.delete_all_loggers_on_cbas() self.log.info("Set the logging level using the json object") status, content, response = self.cbas_util.set_log_level_on_cbas( CbasLogging.DEFAULT_LOGGER_CONFIG_DICT) self.assertTrue(status, msg="Response status incorrect for SET request") self.log.info("Delete specific logger") logger_name = self.input.param("logger_name_to_delete", "com.couchbase.client.core.node") status, content, response = self.cbas_util.delete_specific_cbas_log_level( logger_name) self.assertTrue(status, msg="Status mismatch for DELETE") del CbasLogging.DEFAULT_LOGGER_CONFIG_DICT[logger_name] self.log.info("Update specific logger") logger_name = self.input.param("logger_name_to_update", "org.apache.hyracks") logger_level_to_update = self.input.param("logger_level_to_update", "FATAL") status, response, content = self.cbas_util.set_specific_log_level_on_cbas( logger_name, logger_level_to_update) self.assertTrue(status, msg="Status mismatch for SET") CbasLogging.DEFAULT_LOGGER_CONFIG_DICT[ logger_name] = logger_level_to_update self.log.info("Add a new logger") logger_name = self.input.param("logger_name_to_add", "org.apache.hyracks123") logger_level_to_add = self.input.param("logger_level_to_add", "ALL") status, response, content = self.cbas_util.set_specific_log_level_on_cbas( logger_name, logger_level_to_add) self.assertTrue(status, msg="Status mismatch for SET") CbasLogging.DEFAULT_LOGGER_CONFIG_DICT[ logger_name] = logger_level_to_add self.log.info("Verify logging configuration that we set on cbas Node") for name, level in CbasLogging.DEFAULT_LOGGER_CONFIG_DICT.items(): status, content, response = self.cbas_util.get_specific_cbas_log_level( name) self.assertTrue(status, msg="Response status incorrect for GET request") self.assertEquals(content, level, msg="Logger configuration mismatch for logger " + name) self.sleep( timeout=10, message= "Waiting for logger configuration to be copied across cbas nodes") self.log.info("Verify logging configuration on other cbas node") for name, level in CbasLogging.DEFAULT_LOGGER_CONFIG_DICT.items(): status, content, response = cbas_utils( self.master, self.cbas_servers[0]).get_specific_cbas_log_level(name) self.assertTrue(status, msg="Response status incorrect for GET request") self.assertEquals(content, level, msg="Logger configuration mismatch for logger " + name) self.log.info("Read input params") process_name = self.input.param('process_name', None) service_name = self.input.param('service_name', None) restart_couchbase = self.input.param('restart_couchbase', False) reboot = self.input.param('reboot', False) kill_services = self.input.param('kill_services', False) self.log.info("Establish a remote connection") shell_cc = RemoteMachineShellConnection(self.cbas_node) shell_nc = RemoteMachineShellConnection(self.cbas_servers[0]) if kill_services: self.log.info("Kill the %s service on CC cbas node" % service_name) shell_cc.kill_process(process_name, service_name) self.log.info("Kill the %s service on other cbas node" % service_name) shell_nc.kill_process(process_name, service_name) if restart_couchbase: self.log.info("Restart couchbase CC node ") shell_cc.restart_couchbase() self.log.info("Restart couchbase NC node ") shell_nc.restart_couchbase() if reboot: self.log.info("Reboot couchbase CC node") NodeHelper.reboot_server(self.cbas_node, self) self.log.info("Reboot couchbase NC node") NodeHelper.reboot_server(self.cbas_servers[0], self) self.log.info( "Wait for request to complete and cluster to be active: Using private ping() function" ) cluster_recover_start_time = time.time() while time.time() < cluster_recover_start_time + 180: try: status, metrics, _, cbas_result, _ = self.cbas_util.execute_statement_on_cbas_util( "set `import-private-functions` `true`;ping();") if status == "success": break except: self.sleep(2, message="Wait for service to up again") self.log.info("Verify logging configuration post service kill") for name, level in CbasLogging.DEFAULT_LOGGER_CONFIG_DICT.items(): status, content, response = self.cbas_util.get_specific_cbas_log_level( name) self.assertTrue(status, msg="Response status incorrect for GET request") self.assertEquals(content, level, msg="Logger configuration mismatch for logger " + name) self.sleep( timeout=10, message= "Waiting for logger configuration to be copied across cbas nodes") self.log.info( "Verify logging configuration on other cbas node post service kill" ) for name, level in CbasLogging.DEFAULT_LOGGER_CONFIG_DICT.items(): status, content, response = cbas_utils( self.master, self.cbas_servers[0]).get_specific_cbas_log_level(name) self.assertTrue(status, msg="Response status incorrect for GET request") self.assertEquals(content, level, msg="Logger configuration mismatch for logger " + name)
def test_signal_impact_on_cbas(self): self.log.info("Add nodes, create cbas bucket and dataset") self.set_up_test() self.log.info("Wait for ingestion to complete and verify count") self.cbas_util.wait_for_ingestion_complete([self.dataset_name], self.num_items) self.assertTrue(self.cbas_util.validate_cbas_dataset_items_count(self.dataset_name, self.num_items)) self.log.info("Establish a remote connection") con_cbas_node1 = RemoteMachineShellConnection(self.cbas_node) con_cbas_node2 = RemoteMachineShellConnection(self.cluster.cbas_nodes[0]) self.log.info("SIGSTOP ANALYTICS SERVICE") con_cbas_node1.kill_process(self.process, self.service, 19) con_cbas_node2.kill_process(self.process, self.service, 19) self.log.info("Add more documents in the default bucket") self.perform_doc_ops_in_all_cb_buckets("create", self.num_items, self.num_items * 2, exp=0, batch_size=self.batch_size) self.log.info("SIGCONT ANALYTICS") con_cbas_node1.kill_process(self.process, self.service, 18) con_cbas_node2.kill_process(self.process, self.service, 18) self.sleep(15) self.log.info("Wait for ingestion to complete and verify count") self.cbas_util.wait_for_ingestion_complete([self.dataset_name], self.num_items * 2) self.assertTrue(self.cbas_util.validate_cbas_dataset_items_count(self.dataset_name, self.num_items * 2)) self.log.info("SIGSTOP ANALYTICS SERVICE") con_cbas_node1.kill_process(self.process, self.service, 19) con_cbas_node2.kill_process(self.process, self.service, 19) self.log.info("Delete documents in the default bucket") self.perform_doc_ops_in_all_cb_buckets("delete", 0, self.num_items, exp=0, batch_size=self.batch_size) self.log.info("SIGCONT ANALYTICS") con_cbas_node1.kill_process(self.process, self.service, 18) con_cbas_node2.kill_process(self.process, self.service, 18) self.sleep(15) self.log.info("Wait for ingestion to complete and verify count") self.cbas_util.wait_for_ingestion_complete([self.dataset_name], self.num_items) self.assertTrue(self.cbas_util.validate_cbas_dataset_items_count(self.dataset_name, self.num_items)) con_cbas_node1.disconnect() con_cbas_node2.disconnect()
def test_analytics_recovery_on_busy_system(self): self.log.info("Load data, create cbas buckets, and datasets") self.set_up_test() self.log.info("Wait for ingestion to complete") self.cbas_util.wait_for_ingestion_complete([self.dataset_name], self.num_items) self.assertTrue( self.cbas_util.validate_cbas_dataset_items_count( self.dataset_name, self.num_items)) self.log.info("Get the nodes on which kill is to be run") self.nodes_to_kill_service_on = [] if self.kill_on_cc: neglect_failures = True self.nodes_to_kill_service_on.append(self.cbas_node) if self.kill_on_nc: for cbas_server in self.cbas_servers: self.nodes_to_kill_service_on.append(cbas_server) self.log.info("Run concurrent queries to simulate busy system") statement = "select sleep(count(*),50000) from {0} where mutated=0;".format( self.dataset_name) try: self.cbas_util._run_concurrent_queries(statement, "async", 10, batch_size=10) except Exception as e: if neglect_failures: self.log.info( "Neglecting failed queries, to handle killing Java/Cbas process kill on CC & NC node %s" % e) else: raise e self.log.info("Establish a remote connection on node and kill service") for node in self.nodes_to_kill_service_on: shell = RemoteMachineShellConnection(node) shell.kill_process(self.process, self.service, signum=self.signum) self.sleep( 5, "Sleeping for 5 seconds as after killing the service the service takes some time to exit and the service checks get pass by that time." ) self.log.info( "Wait for request to complete and cluster to be active: Using private ping() function" ) service_up = False start_time = time.time() while time.time() < start_time + 120: try: status, metrics, _, cbas_result, _ = self.cbas_util.execute_statement_on_cbas_util( "set `import-private-functions` `true`;ping();", timeout=600, analytics_timeout=600) if status == "success": service_up = True break except: pass self.sleep(1) self.assertTrue( service_up, msg= "CBAS service was not up even after 120 seconds of process kill. Failing the test possible a bug" ) self.log.info("Observe no reingestion on node after restart") items_in_cbas_bucket, _ = self.cbas_util.get_num_items_in_cbas_dataset( self.dataset_name) self.assertTrue( items_in_cbas_bucket > 0, msg= "Items in CBAS bucket must greather than 0. If not re-ingestion has happened" ) self.assertTrue( self.cbas_util.validate_cbas_dataset_items_count( self.dataset_name, self.num_items)) self.log.info("Add more documents in the default bucket") self.perform_doc_ops_in_all_cb_buckets(self.num_items, "create", self.num_items, self.num_items * 2, exp=0, batch_size=self.batch_size) self.log.info("Wait for ingestion to complete") self.assertTrue( self.cbas_util.validate_cbas_dataset_items_count( self.dataset_name, self.num_items * 2))