Ejemplo n.º 1
0
    def test_analytics_recovery_on_idle_system(self):

        self.log.info("Load data, create cbas buckets, and datasets")
        self.set_up_test()

        self.log.info("Wait for ingestion to complete")
        self.cbas_util.wait_for_ingestion_complete([self.dataset_name],
                                                   self.num_items)
        self.assertTrue(
            self.cbas_util.validate_cbas_dataset_items_count(
                self.dataset_name, self.num_items))

        self.log.info("Get the nodes on which kill is to be run")
        self.nodes_to_kill_service_on = []
        if self.kill_on_cc:
            self.nodes_to_kill_service_on.append(self.cbas_node)
        if self.kill_on_nc:
            for cbas_server in self.cbas_servers:
                self.nodes_to_kill_service_on.append(cbas_server)

        self.log.info("Establish a remote connection on node and kill service")
        for node in self.nodes_to_kill_service_on:
            shell = RemoteMachineShellConnection(node)
            shell.kill_process(self.process, self.service, signum=self.signum)

        self.sleep(
            5,
            "Sleeping for 5 seconds as after killing the service the service takes some time to exit and the service checks get pass by that time."
        )

        self.log.info("Wait for cluster to be active")
        self.assertTrue(self.cbas_util.wait_for_cbas_to_recover(),
                        msg="Analytics service unavailable")

        self.log.info("Observe no reingestion on node after restart")
        items_in_cbas_bucket, _ = self.cbas_util.get_num_items_in_cbas_dataset(
            self.dataset_name)
        self.assertTrue(
            items_in_cbas_bucket > 0,
            msg=
            "Items in CBAS bucket must greather than 0. If not re-ingestion has happened"
        )
        self.assertTrue(
            self.cbas_util.validate_cbas_dataset_items_count(
                self.dataset_name, self.num_items))

        self.log.info("Add more documents in the default bucket")
        self.perform_doc_ops_in_all_cb_buckets(self.num_items,
                                               "create",
                                               self.num_items,
                                               self.num_items * 2,
                                               exp=0,
                                               batch_size=self.batch_size)

        self.log.info("Wait for ingestion to complete")
        self.assertTrue(
            self.cbas_util.validate_cbas_dataset_items_count(
                self.dataset_name, self.num_items * 2))
Ejemplo n.º 2
0
    def test_analytics_recovery_on_busy_system(self):

        self.log.info("Load data, create cbas buckets, and datasets")
        self.set_up_test()

        self.log.info("Wait for ingestion to complete")
        self.cbas_util.wait_for_ingestion_complete([self.dataset_name], self.num_items)
        self.assertTrue(self.cbas_util.validate_cbas_dataset_items_count(self.dataset_name, self.num_items))

        self.log.info("Get the nodes on which kill is to be run")
        self.nodes_to_kill_service_on = []
        if self.kill_on_cc:
            neglect_failures = True
            self.nodes_to_kill_service_on.append(self.cbas_node)
        if self.kill_on_nc:
            for cbas_server in self.cbas_servers:
                self.nodes_to_kill_service_on.append(cbas_server)

        self.log.info("Run concurrent queries to simulate busy system")
        statement = "select sleep(count(*),50000) from {0} where mutated=0;".format(self.dataset_name)
        try:
            self.cbas_util._run_concurrent_queries(statement, "async", 500, batch_size=100)
        except Exception as e:
            if neglect_failures:
                self.log.info("Neglecting failed queries, to handle killing Java/Cbas process kill on CC & NC node %s"%e)
            else:
                raise e

        self.log.info("Establish a remote connection on node and kill service")
        for node in self.nodes_to_kill_service_on:
            shell = RemoteMachineShellConnection(node)
            shell.kill_process(self.process, self.service, signum=self.signum)
            self.sleep(20, message="wait for service to be back again...")

        self.log.info("Observe no reingestion on node after restart")
        start_time = time.time()
        while time.time() < start_time + 120:
            try:
                items_in_cbas_bucket, _ = self.cbas_util.get_num_items_in_cbas_dataset(self.dataset_name)
                if items_in_cbas_bucket != -1:
                    break
            except:
                pass
        self.assertTrue(items_in_cbas_bucket > 0, msg="Items in CBAS bucket must greather than 0. If not re-ingestion has happened")
        self.assertTrue(self.cbas_util.validate_cbas_dataset_items_count(self.dataset_name, self.num_items))

        self.log.info("Add more documents in the default bucket")
        self.perform_doc_ops_in_all_cb_buckets(self.num_items, "create", self.num_items, self.num_items * 2, exp=0,
                                               batch_size=self.batch_size)

        self.log.info("Wait for ingestion to complete")
        self.cbas_util.wait_for_ingestion_complete([self.dataset_name], self.num_items * 2)
        self.assertTrue(self.cbas_util.validate_cbas_dataset_items_count(self.dataset_name, self.num_items * 2))
Ejemplo n.º 3
0
    def test_analytics_recovery_on_idle_system(self):

        self.log.info("Load data, create cbas buckets, and datasets")
        self.set_up_test()

        self.log.info("Wait for ingestion to complete")
        self.cbas_util.wait_for_ingestion_complete([self.dataset_name], self.num_items)
        self.assertTrue(self.cbas_util.validate_cbas_dataset_items_count(self.dataset_name, self.num_items))

        self.log.info("Get the nodes on which kill is to be run")
        self.nodes_to_kill_service_on = []
        if self.kill_on_cc:
            self.nodes_to_kill_service_on.append(self.cbas_node)
        if self.kill_on_nc:
            for cbas_server in self.cbas_servers:
                self.nodes_to_kill_service_on.append(cbas_server)

        self.log.info("Establish a remote connection on node and kill service")
        for node in self.nodes_to_kill_service_on:
            shell = RemoteMachineShellConnection(node)
            shell.kill_process(self.process, self.service, signum=self.signum)

        self.log.info("Observe no reingestion on node after restart")
        start_time = time.time()
        while time.time() < start_time + 120:
            try:
                items_in_cbas_bucket, _ = self.cbas_util.get_num_items_in_cbas_dataset(self.dataset_name)
                if items_in_cbas_bucket != -1:
                    break
            except:
                pass
        self.assertTrue(items_in_cbas_bucket > 0, msg="Items in CBAS bucket must greather than 0. If not re-ingestion has happened")
        self.assertTrue(self.cbas_util.validate_cbas_dataset_items_count(self.dataset_name, self.num_items))

        self.log.info("Add more documents in the default bucket")
        self.perform_doc_ops_in_all_cb_buckets(self.num_items, "create", self.num_items, self.num_items * 2, exp=0,
                                               batch_size=self.batch_size)

        self.log.info("Wait for ingestion to complete")
        self.cbas_util.wait_for_ingestion_complete([self.dataset_name], self.num_items * 2)
        self.assertTrue(self.cbas_util.validate_cbas_dataset_items_count(self.dataset_name, self.num_items * 2))
Ejemplo n.º 4
0
    def test_logging_configurations_are_restored_post_service_restarts(self):

        self.log.info("Add a cbas node")
        result = self.add_node(self.cbas_servers[0],
                               services=["cbas"],
                               rebalance=True)
        self.assertTrue(result, msg="Failed to add CBAS node")

        self.log.info("Delete all loggers")
        self.cbas_util.delete_all_loggers_on_cbas()

        self.log.info("Set the logging level using the json object")
        status, content, response = self.cbas_util.set_log_level_on_cbas(
            CbasLogging.DEFAULT_LOGGER_CONFIG_DICT)
        self.assertTrue(status,
                        msg="Response status incorrect for SET request")

        self.log.info("Delete specific logger")
        logger_name = self.input.param("logger_name_to_delete",
                                       "com.couchbase.client.core.node")
        status, content, response = self.cbas_util.delete_specific_cbas_log_level(
            logger_name)
        self.assertTrue(status, msg="Status mismatch for DELETE")
        del CbasLogging.DEFAULT_LOGGER_CONFIG_DICT[logger_name]

        self.log.info("Update specific logger")
        logger_name = self.input.param("logger_name_to_update",
                                       "org.apache.hyracks")
        logger_level_to_update = self.input.param("logger_level_to_update",
                                                  "FATAL")
        status, response, content = self.cbas_util.set_specific_log_level_on_cbas(
            logger_name, logger_level_to_update)
        self.assertTrue(status, msg="Status mismatch for SET")
        CbasLogging.DEFAULT_LOGGER_CONFIG_DICT[
            logger_name] = logger_level_to_update

        self.log.info("Add a new logger")
        logger_name = self.input.param("logger_name_to_add",
                                       "org.apache.hyracks123")
        logger_level_to_add = self.input.param("logger_level_to_add", "ALL")
        status, response, content = self.cbas_util.set_specific_log_level_on_cbas(
            logger_name, logger_level_to_add)
        self.assertTrue(status, msg="Status mismatch for SET")
        CbasLogging.DEFAULT_LOGGER_CONFIG_DICT[
            logger_name] = logger_level_to_add

        self.log.info("Verify logging configuration that we set on cbas Node")
        for name, level in CbasLogging.DEFAULT_LOGGER_CONFIG_DICT.items():
            status, content, response = self.cbas_util.get_specific_cbas_log_level(
                name)
            self.assertTrue(status,
                            msg="Response status incorrect for GET request")
            self.assertEquals(content,
                              level,
                              msg="Logger configuration mismatch for logger " +
                              name)

        self.sleep(
            timeout=10,
            message=
            "Waiting for logger configuration to be copied across cbas nodes")

        self.log.info("Verify logging configuration on other cbas node")
        for name, level in CbasLogging.DEFAULT_LOGGER_CONFIG_DICT.items():
            status, content, response = cbas_utils(
                self.master,
                self.cbas_servers[0]).get_specific_cbas_log_level(name)
            self.assertTrue(status,
                            msg="Response status incorrect for GET request")
            self.assertEquals(content,
                              level,
                              msg="Logger configuration mismatch for logger " +
                              name)

        self.log.info("Read input params")
        process_name = self.input.param('process_name', None)
        service_name = self.input.param('service_name', None)
        restart_couchbase = self.input.param('restart_couchbase', False)
        reboot = self.input.param('reboot', False)
        kill_services = self.input.param('kill_services', False)

        self.log.info("Establish a remote connection")
        shell_cc = RemoteMachineShellConnection(self.cbas_node)
        shell_nc = RemoteMachineShellConnection(self.cbas_servers[0])

        if kill_services:
            self.log.info("Kill the %s service on CC cbas node" % service_name)
            shell_cc.kill_process(process_name, service_name)

            self.log.info("Kill the %s service on other cbas node" %
                          service_name)
            shell_nc.kill_process(process_name, service_name)

        if restart_couchbase:
            self.log.info("Restart couchbase CC node ")
            shell_cc.restart_couchbase()

            self.log.info("Restart couchbase NC node ")
            shell_nc.restart_couchbase()

        if reboot:
            self.log.info("Reboot couchbase CC node")
            NodeHelper.reboot_server(self.cbas_node, self)

            self.log.info("Reboot couchbase NC node")
            NodeHelper.reboot_server(self.cbas_servers[0], self)

        end_time = datetime.datetime.now() + datetime.timedelta(minutes=int(1))
        self.log.info(
            "Wait for nodes to be bootstrapped, neglect the unreachable server exceptions"
        )
        while datetime.datetime.now() < end_time:
            try:
                self.log.info("Get the logging configurations")
                status, content, response = self.cbas_util.get_log_level_on_cbas(
                )
                self.assertTrue(
                    status, msg="Response status incorrect for GET request")

                self.log.info("Convert response to a dictionary")
                log_dict = CbasLogging.convert_logger_get_result_to_a_dict(
                    content)
                if len(log_dict) >= len(
                        CbasLogging.DEFAULT_LOGGER_CONFIG_DICT):
                    break
            except Exception as e:
                pass

        self.log.info("Verify logging configuration post service kill")
        for name, level in CbasLogging.DEFAULT_LOGGER_CONFIG_DICT.items():
            status, content, response = self.cbas_util.get_specific_cbas_log_level(
                name)
            self.assertTrue(status,
                            msg="Response status incorrect for GET request")
            self.assertEquals(content,
                              level,
                              msg="Logger configuration mismatch for logger " +
                              name)

        self.sleep(
            timeout=10,
            message=
            "Waiting for logger configuration to be copied across cbas nodes")

        self.log.info(
            "Verify logging configuration on other cbas node post service kill"
        )
        for name, level in CbasLogging.DEFAULT_LOGGER_CONFIG_DICT.items():
            status, content, response = cbas_utils(
                self.master,
                self.cbas_servers[0]).get_specific_cbas_log_level(name)
            self.assertTrue(status,
                            msg="Response status incorrect for GET request")
            self.assertEquals(content,
                              level,
                              msg="Logger configuration mismatch for logger " +
                              name)
Ejemplo n.º 5
0
    def test_logging_configurations_are_restored_post_service_restarts(self):

        self.log.info("Add a cbas node")
        result = self.add_node(self.cbas_servers[0],
                               services=["cbas"],
                               rebalance=True)
        self.assertTrue(result, msg="Failed to add CBAS node")

        self.log.info("Delete all loggers")
        self.cbas_util.delete_all_loggers_on_cbas()

        self.log.info("Set the logging level using the json object")
        status, content, response = self.cbas_util.set_log_level_on_cbas(
            CbasLogging.DEFAULT_LOGGER_CONFIG_DICT)
        self.assertTrue(status,
                        msg="Response status incorrect for SET request")

        self.log.info("Delete specific logger")
        logger_name = self.input.param("logger_name_to_delete",
                                       "com.couchbase.client.core.node")
        status, content, response = self.cbas_util.delete_specific_cbas_log_level(
            logger_name)
        self.assertTrue(status, msg="Status mismatch for DELETE")
        del CbasLogging.DEFAULT_LOGGER_CONFIG_DICT[logger_name]

        self.log.info("Update specific logger")
        logger_name = self.input.param("logger_name_to_update",
                                       "org.apache.hyracks")
        logger_level_to_update = self.input.param("logger_level_to_update",
                                                  "FATAL")
        status, response, content = self.cbas_util.set_specific_log_level_on_cbas(
            logger_name, logger_level_to_update)
        self.assertTrue(status, msg="Status mismatch for SET")
        CbasLogging.DEFAULT_LOGGER_CONFIG_DICT[
            logger_name] = logger_level_to_update

        self.log.info("Add a new logger")
        logger_name = self.input.param("logger_name_to_add",
                                       "org.apache.hyracks123")
        logger_level_to_add = self.input.param("logger_level_to_add", "ALL")
        status, response, content = self.cbas_util.set_specific_log_level_on_cbas(
            logger_name, logger_level_to_add)
        self.assertTrue(status, msg="Status mismatch for SET")
        CbasLogging.DEFAULT_LOGGER_CONFIG_DICT[
            logger_name] = logger_level_to_add

        self.log.info("Verify logging configuration that we set on cbas Node")
        for name, level in CbasLogging.DEFAULT_LOGGER_CONFIG_DICT.items():
            status, content, response = self.cbas_util.get_specific_cbas_log_level(
                name)
            self.assertTrue(status,
                            msg="Response status incorrect for GET request")
            self.assertEquals(content,
                              level,
                              msg="Logger configuration mismatch for logger " +
                              name)

        self.sleep(
            timeout=10,
            message=
            "Waiting for logger configuration to be copied across cbas nodes")

        self.log.info("Verify logging configuration on other cbas node")
        for name, level in CbasLogging.DEFAULT_LOGGER_CONFIG_DICT.items():
            status, content, response = cbas_utils(
                self.master,
                self.cbas_servers[0]).get_specific_cbas_log_level(name)
            self.assertTrue(status,
                            msg="Response status incorrect for GET request")
            self.assertEquals(content,
                              level,
                              msg="Logger configuration mismatch for logger " +
                              name)

        self.log.info("Read input params")
        process_name = self.input.param('process_name', None)
        service_name = self.input.param('service_name', None)
        restart_couchbase = self.input.param('restart_couchbase', False)
        reboot = self.input.param('reboot', False)
        kill_services = self.input.param('kill_services', False)

        self.log.info("Establish a remote connection")
        shell_cc = RemoteMachineShellConnection(self.cbas_node)
        shell_nc = RemoteMachineShellConnection(self.cbas_servers[0])

        if kill_services:
            self.log.info("Kill the %s service on CC cbas node" % service_name)
            shell_cc.kill_process(process_name, service_name)

            self.log.info("Kill the %s service on other cbas node" %
                          service_name)
            shell_nc.kill_process(process_name, service_name)

        if restart_couchbase:
            self.log.info("Restart couchbase CC node ")
            shell_cc.restart_couchbase()

            self.log.info("Restart couchbase NC node ")
            shell_nc.restart_couchbase()

        if reboot:
            self.log.info("Reboot couchbase CC node")
            NodeHelper.reboot_server(self.cbas_node, self)

            self.log.info("Reboot couchbase NC node")
            NodeHelper.reboot_server(self.cbas_servers[0], self)

        self.log.info(
            "Wait for request to complete and cluster to be active: Using private ping() function"
        )
        cluster_recover_start_time = time.time()
        while time.time() < cluster_recover_start_time + 180:
            try:
                status, metrics, _, cbas_result, _ = self.cbas_util.execute_statement_on_cbas_util(
                    "set `import-private-functions` `true`;ping();")
                if status == "success":
                    break
            except:
                self.sleep(2, message="Wait for service to up again")

        self.log.info("Verify logging configuration post service kill")
        for name, level in CbasLogging.DEFAULT_LOGGER_CONFIG_DICT.items():
            status, content, response = self.cbas_util.get_specific_cbas_log_level(
                name)
            self.assertTrue(status,
                            msg="Response status incorrect for GET request")
            self.assertEquals(content,
                              level,
                              msg="Logger configuration mismatch for logger " +
                              name)

        self.sleep(
            timeout=10,
            message=
            "Waiting for logger configuration to be copied across cbas nodes")

        self.log.info(
            "Verify logging configuration on other cbas node post service kill"
        )
        for name, level in CbasLogging.DEFAULT_LOGGER_CONFIG_DICT.items():
            status, content, response = cbas_utils(
                self.master,
                self.cbas_servers[0]).get_specific_cbas_log_level(name)
            self.assertTrue(status,
                            msg="Response status incorrect for GET request")
            self.assertEquals(content,
                              level,
                              msg="Logger configuration mismatch for logger " +
                              name)
Ejemplo n.º 6
0
    def test_signal_impact_on_cbas(self):
        self.log.info("Add nodes, create cbas bucket and dataset")
        self.set_up_test()

        self.log.info("Wait for ingestion to complete and verify count")
        self.cbas_util.wait_for_ingestion_complete([self.dataset_name], self.num_items)
        self.assertTrue(self.cbas_util.validate_cbas_dataset_items_count(self.dataset_name, self.num_items))

        self.log.info("Establish a remote connection")
        con_cbas_node1 = RemoteMachineShellConnection(self.cbas_node)
        con_cbas_node2 = RemoteMachineShellConnection(self.cluster.cbas_nodes[0])

        self.log.info("SIGSTOP ANALYTICS SERVICE")
        con_cbas_node1.kill_process(self.process, self.service, 19)
        con_cbas_node2.kill_process(self.process, self.service, 19)

        self.log.info("Add more documents in the default bucket")
        self.perform_doc_ops_in_all_cb_buckets("create", self.num_items, self.num_items * 2, exp=0,
                                               batch_size=self.batch_size)

        self.log.info("SIGCONT ANALYTICS")
        con_cbas_node1.kill_process(self.process, self.service, 18)
        con_cbas_node2.kill_process(self.process, self.service, 18)
        self.sleep(15)

        self.log.info("Wait for ingestion to complete and verify count")
        self.cbas_util.wait_for_ingestion_complete([self.dataset_name], self.num_items * 2)
        self.assertTrue(self.cbas_util.validate_cbas_dataset_items_count(self.dataset_name, self.num_items * 2))

        self.log.info("SIGSTOP ANALYTICS SERVICE")
        con_cbas_node1.kill_process(self.process, self.service, 19)
        con_cbas_node2.kill_process(self.process, self.service, 19)

        self.log.info("Delete documents in the default bucket")
        self.perform_doc_ops_in_all_cb_buckets("delete", 0, self.num_items, exp=0,
                                               batch_size=self.batch_size)

        self.log.info("SIGCONT ANALYTICS")
        con_cbas_node1.kill_process(self.process, self.service, 18)
        con_cbas_node2.kill_process(self.process, self.service, 18)
        self.sleep(15)

        self.log.info("Wait for ingestion to complete and verify count")
        self.cbas_util.wait_for_ingestion_complete([self.dataset_name], self.num_items)
        self.assertTrue(self.cbas_util.validate_cbas_dataset_items_count(self.dataset_name, self.num_items))
        con_cbas_node1.disconnect()
        con_cbas_node2.disconnect()
Ejemplo n.º 7
0
    def test_analytics_recovery_on_busy_system(self):

        self.log.info("Load data, create cbas buckets, and datasets")
        self.set_up_test()

        self.log.info("Wait for ingestion to complete")
        self.cbas_util.wait_for_ingestion_complete([self.dataset_name],
                                                   self.num_items)
        self.assertTrue(
            self.cbas_util.validate_cbas_dataset_items_count(
                self.dataset_name, self.num_items))

        self.log.info("Get the nodes on which kill is to be run")
        self.nodes_to_kill_service_on = []
        if self.kill_on_cc:
            neglect_failures = True
            self.nodes_to_kill_service_on.append(self.cbas_node)
        if self.kill_on_nc:
            for cbas_server in self.cbas_servers:
                self.nodes_to_kill_service_on.append(cbas_server)

        self.log.info("Run concurrent queries to simulate busy system")
        statement = "select sleep(count(*),50000) from {0} where mutated=0;".format(
            self.dataset_name)
        try:
            self.cbas_util._run_concurrent_queries(statement,
                                                   "async",
                                                   10,
                                                   batch_size=10)
        except Exception as e:
            if neglect_failures:
                self.log.info(
                    "Neglecting failed queries, to handle killing Java/Cbas process kill on CC & NC node %s"
                    % e)
            else:
                raise e

        self.log.info("Establish a remote connection on node and kill service")
        for node in self.nodes_to_kill_service_on:
            shell = RemoteMachineShellConnection(node)
            shell.kill_process(self.process, self.service, signum=self.signum)

        self.sleep(
            5,
            "Sleeping for 5 seconds as after killing the service the service takes some time to exit and the service checks get pass by that time."
        )
        self.log.info(
            "Wait for request to complete and cluster to be active: Using private ping() function"
        )
        service_up = False
        start_time = time.time()
        while time.time() < start_time + 120:
            try:
                status, metrics, _, cbas_result, _ = self.cbas_util.execute_statement_on_cbas_util(
                    "set `import-private-functions` `true`;ping();",
                    timeout=600,
                    analytics_timeout=600)
                if status == "success":
                    service_up = True
                    break
            except:
                pass
            self.sleep(1)

        self.assertTrue(
            service_up,
            msg=
            "CBAS service was not up even after 120 seconds of process kill. Failing the test possible a bug"
        )

        self.log.info("Observe no reingestion on node after restart")
        items_in_cbas_bucket, _ = self.cbas_util.get_num_items_in_cbas_dataset(
            self.dataset_name)
        self.assertTrue(
            items_in_cbas_bucket > 0,
            msg=
            "Items in CBAS bucket must greather than 0. If not re-ingestion has happened"
        )
        self.assertTrue(
            self.cbas_util.validate_cbas_dataset_items_count(
                self.dataset_name, self.num_items))

        self.log.info("Add more documents in the default bucket")
        self.perform_doc_ops_in_all_cb_buckets(self.num_items,
                                               "create",
                                               self.num_items,
                                               self.num_items * 2,
                                               exp=0,
                                               batch_size=self.batch_size)

        self.log.info("Wait for ingestion to complete")
        self.assertTrue(
            self.cbas_util.validate_cbas_dataset_items_count(
                self.dataset_name, self.num_items * 2))