Exemple #1
0
    def test_primary_cbas_shutdown(self):
        '''
        Description: This test will add the second cbas node then start rebalance and cancel rebalance
        before rebalance completes.

        Steps:
        1. Add first cbas node.
        2. Start rebalance, wait for rebalance complete.
        3. Create bucket, datasets, connect bucket. Data ingestion should start.
        4. Add another cbas node, rebalance.
        5. Stop Couchbase service for Node1 added in step 1. Failover the node and rebalance.
        6. Second cbas node added in step 4 should be able to serve queries.

        Author: Ritesh Agarwal
        '''
        self.bucket_util.load_sample_bucket(self.cluster, self.sample_bucket)
        otpNode = self.cluster_util.add_node(self.cluster.cbas_nodes[0], services=["cbas"])
        self.setup_cbas_bucket_dataset_connect(self.cb_bucket_name,
                                               self.sample_bucket.stats.expected_item_count)
        self.cluster_util.add_node(self.cluster.cbas_nodes[1], services=["cbas"])
        remote_client = RemoteMachineShellConnection(self.cluster.cbas_nodes[0])
        remote_client.stop_couchbase()
        self.rest.fail_over(otpNode=otpNode.id)
        self.assertTrue(self.cluster_util.rebalance(self.cluster),
                        "Rebalance Failed")

        query = "select count(*) from {0};".format(self.cbas_dataset_name)
        self.cbas_util._run_concurrent_queries(query, "immediate", 100,
                                               rest=RestConnection(self.cluster.cbas_nodes[1]),
                                               batch_size=self.concurrent_batch_size)
        remote_client.start_couchbase()
        remote_client.disconnect()
        self.cluster_util.wait_for_ns_servers_or_assert([self.cluster.cbas_nodes[0]])
Exemple #2
0
    def test_restart_cb(self):
        '''
        Description: This test will restart CB and verify that CBAS is also up and running with CB.

        Steps:
        1. Add first cbas node.
        2. Start rebalance, wait for rebalance complete.
        3. Stop Couchbase service, Start Couchbase Service. Wait for service to get started.
        4. Verify that CBAS service is also up Create bucket, datasets, connect bucket. Data ingestion should start.

        Author: Ritesh Agarwal
        '''
        self.bucket_util.load_sample_bucket(self.cluster, self.sample_bucket)
        self.cluster_util.add_node(self.cluster.cbas_nodes[0], services=["cbas"])

        remote_client = RemoteMachineShellConnection(self.cluster.cbas_nodes[0])
        remote_client.stop_couchbase()
        remote_client.start_couchbase()
        remote_client.disconnect()

        self.log.info("Wait for cluster to be active")
        self.assertTrue(self.cbas_util.wait_for_cbas_to_recover(), msg="Analytics service unavailable")

        self.setup_cbas_bucket_dataset_connect(self.cb_bucket_name,
                                               self.sample_bucket.stats.expected_item_count)
        self.assertTrue(self.cbas_util.validate_cbas_dataset_items_count(
            self.cbas_dataset_name,
            self.sample_bucket.stats.expected_item_count), "Data loss in CBAS.")
Exemple #3
0
    def test_reboot_cbas(self):
        '''
        Description: This test will add the second cbas node then start rebalance and cancel rebalance
        before rebalance completes.

        Steps:
        1. Add first cbas node.
        2. Start rebalance, wait for rebalance complete.
        3. Create bucket, datasets, connect bucket. Data ingestion should start.
        4. Reboot CBAS node addd in Step 1.
        5. After reboot cbas node should be able to serve queries, validate items count.

        Author: Ritesh Agarwal
        '''
        self.bucket_util.load_sample_bucket(self.cluster, self.sample_bucket)
        self.cluster_util.add_node(self.cbas_node, services=["kv","cbas"])
        self.setup_cbas_bucket_dataset_connect(self.cb_bucket_name,
                                               self.sample_bucket.stats.expected_item_count)
        shell = RemoteMachineShellConnection(self.cbas_node)
        shell.reboot_server_and_wait_for_cb_run(self.cluster_util, self.cbas_node)
        shell.disconnect()
        items_in_cbas_bucket = 0
        start_time = time.time()
        while (items_in_cbas_bucket == 0 or items_in_cbas_bucket == -1) and time.time()<start_time+120:
            try:
                items_in_cbas_bucket, _ = self.cbas_util.get_num_items_in_cbas_dataset(self.cbas_dataset_name)
            except:
                pass
            self.sleep(1)

        self.assertTrue(self.cbas_util.validate_cbas_dataset_items_count(
            self.cbas_dataset_name, self.sample_bucket.stats.expected_item_count),
            "Data loss in CBAS.")
Exemple #4
0
    def test_analytics_recovery_on_busy_system(self):

        self.log.info("Load data, create cbas buckets, and datasets")
        self.set_up_test()

        self.log.info("Wait for ingestion to complete")
        self.cbas_util.wait_for_ingestion_complete([self.dataset_name], self.num_items)
        self.assertTrue(self.cbas_util.validate_cbas_dataset_items_count(self.dataset_name, self.num_items))

        self.log.info("Get the nodes on which kill is to be run")
        self.nodes_to_kill_service_on = []
        if self.kill_on_cc:
            neglect_failures = True
            self.nodes_to_kill_service_on.append(self.cbas_node)
        if self.kill_on_nc:
            for cbas_server in self.cluster.cbas_nodes:
                self.nodes_to_kill_service_on.append(cbas_server)

        self.log.info("Run concurrent queries to simulate busy system")
        statement = "select sleep(count(*),50000) from {0} where mutated=0;".format(self.dataset_name)
        try:
            self.cbas_util._run_concurrent_queries(statement, "async", 10, batch_size=10)
        except Exception as e:
            if neglect_failures:
                self.log.info("Neglecting failed queries, to handle killing Java/Cbas process kill on CC & NC node %s"%e)
            else:
                raise e

        self.log.info("Establish a remote connection on node and kill service")
        for node in self.nodes_to_kill_service_on:
            shell = RemoteMachineShellConnection(node)
            shell.kill_process(self.process, self.service, signum=self.signum)
            shell.disconnect()

        self.sleep(5, "Sleeping for 5 seconds as after killing the service the service takes some time to exit and the service checks get pass by that time.")

        self.log.info("Wait for cluster to be active")
        self.assertTrue(self.cbas_util.wait_for_cbas_to_recover(), msg="Analytics service unavailable")

        self.log.info("Observe no reingestion on node after restart")
        items_in_cbas_bucket, _ = self.cbas_util.get_num_items_in_cbas_dataset(self.dataset_name)
        self.assertTrue(items_in_cbas_bucket > 0, msg="Items in CBAS bucket must greather than 0. If not re-ingestion has happened")
        self.assertTrue(self.cbas_util.validate_cbas_dataset_items_count(self.dataset_name, self.num_items))

        self.log.info("Add more documents in the default bucket")
        self.perform_doc_ops_in_all_cb_buckets("create", self.num_items, self.num_items * 2, exp=0,
                                               batch_size=self.batch_size)

        self.log.info("Wait for ingestion to complete")
        self.assertTrue(self.cbas_util.validate_cbas_dataset_items_count(self.dataset_name, self.num_items * 2))
Exemple #5
0
    def test_restart_of_all_nodes(self):

        self.log.info("Add nodes, create cbas bucket and dataset")
        self.set_up_test()

        self.log.info("Wait for ingestion to complete and verify count")
        self.cbas_util.wait_for_ingestion_complete([self.dataset_name], self.num_items)
        self.assertTrue(self.cbas_util.validate_cbas_dataset_items_count(self.dataset_name, self.num_items))

        self.log.info("Restart nodes")
        restart_kv = self.input.param("restart_kv", True)
        restart_cbas = self.input.param("restart_cbas", True)
        self.restart_servers = []

        if restart_kv:
            for kv_server in self.cluster.kv_nodes:
                self.restart_servers.append(kv_server)
        if restart_cbas:
            self.restart_servers.append(self.cbas_node)
            for cbas_server in self.cluster.cbas_nodes:
                self.restart_servers.append(cbas_server)

        for restart_node in self.restart_servers:
            remote_client = RemoteMachineShellConnection(restart_node)
            remote_client.reboot_server_and_wait_for_cb_run(self.cluster_util,
                                                            restart_node)
            remote_client.disconnect()
        self.sleep(15, message="Wait for service to be up and accept request")

        self.log.info("Check if all analytics nodes are up and running")
        self.assertTrue(self.cbas_util.wait_for_cbas_to_recover(), msg="Analytics service failed to recover")

        self.log.info("Add more documents in the default bucket")
        self.perform_doc_ops_in_all_cb_buckets("create", self.num_items, self.num_items * 2, exp=0,
                                               batch_size=self.batch_size)

        self.log.info("Wait for ingestion to complete and verify count")
        self.cbas_util.wait_for_ingestion_complete([self.dataset_name], self.num_items * 2)
        self.assertTrue(self.cbas_util.validate_cbas_dataset_items_count(self.dataset_name, self.num_items * 2))

        self.log.info("Delete documents in the default bucket")
        self.perform_doc_ops_in_all_cb_buckets("delete", 0, self.num_items, exp=0,
                                               batch_size=self.batch_size)

        self.log.info("Wait for ingestion to complete and verify count")
        self.cbas_util.wait_for_ingestion_complete([self.dataset_name], self.num_items)
        self.assertTrue(self.cbas_util.validate_cbas_dataset_items_count(self.dataset_name, self.num_items))
Exemple #6
0
    def test_signal_impact_on_cbas(self):
        self.log.info("Add nodes, create cbas bucket and dataset")
        self.set_up_test()

        self.log.info("Wait for ingestion to complete and verify count")
        self.cbas_util.wait_for_ingestion_complete([self.dataset_name], self.num_items)
        self.assertTrue(self.cbas_util.validate_cbas_dataset_items_count(self.dataset_name, self.num_items))

        self.log.info("Establish a remote connection")
        con_cbas_node1 = RemoteMachineShellConnection(self.cbas_node)
        con_cbas_node2 = RemoteMachineShellConnection(self.cluster.cbas_nodes[0])

        self.log.info("SIGSTOP ANALYTICS SERVICE")
        con_cbas_node1.kill_process(self.process, self.service, 19)
        con_cbas_node2.kill_process(self.process, self.service, 19)

        self.log.info("Add more documents in the default bucket")
        self.perform_doc_ops_in_all_cb_buckets("create", self.num_items, self.num_items * 2, exp=0,
                                               batch_size=self.batch_size)

        self.log.info("SIGCONT ANALYTICS")
        con_cbas_node1.kill_process(self.process, self.service, 18)
        con_cbas_node2.kill_process(self.process, self.service, 18)
        self.sleep(15)

        self.log.info("Wait for ingestion to complete and verify count")
        self.cbas_util.wait_for_ingestion_complete([self.dataset_name], self.num_items * 2)
        self.assertTrue(self.cbas_util.validate_cbas_dataset_items_count(self.dataset_name, self.num_items * 2))

        self.log.info("SIGSTOP ANALYTICS SERVICE")
        con_cbas_node1.kill_process(self.process, self.service, 19)
        con_cbas_node2.kill_process(self.process, self.service, 19)

        self.log.info("Delete documents in the default bucket")
        self.perform_doc_ops_in_all_cb_buckets("delete", 0, self.num_items, exp=0,
                                               batch_size=self.batch_size)

        self.log.info("SIGCONT ANALYTICS")
        con_cbas_node1.kill_process(self.process, self.service, 18)
        con_cbas_node2.kill_process(self.process, self.service, 18)
        self.sleep(15)

        self.log.info("Wait for ingestion to complete and verify count")
        self.cbas_util.wait_for_ingestion_complete([self.dataset_name], self.num_items)
        self.assertTrue(self.cbas_util.validate_cbas_dataset_items_count(self.dataset_name, self.num_items))
        con_cbas_node1.disconnect()
        con_cbas_node2.disconnect()
Exemple #7
0
    def test_logging_configurations_are_restored_post_service_restarts(self):

        self.log.info("Add a cbas node")
        result = self.cluster_util.add_node(self.cluster.cbas_nodes[0],
                                            services=["cbas"],
                                            rebalance=True)
        self.assertTrue(result, msg="Failed to add CBAS node")

        self.log.info("Delete all loggers")
        self.cbas_util.delete_all_loggers_on_cbas()

        self.log.info("Set the logging level using the json object")
        status, content, response = self.cbas_util.set_log_level_on_cbas(
            CbasLogging.DEFAULT_LOGGER_CONFIG_DICT)
        self.assertTrue(status,
                        msg="Response status incorrect for SET request")

        self.log.info("Delete specific logger")
        logger_name = self.input.param("logger_name_to_delete",
                                       "com.couchbase.client.core.node")
        status, content, response = self.cbas_util.delete_specific_cbas_log_level(
            logger_name)
        self.assertTrue(status, msg="Status mismatch for DELETE")
        del CbasLogging.DEFAULT_LOGGER_CONFIG_DICT[logger_name]

        self.log.info("Update specific logger")
        logger_name = self.input.param("logger_name_to_update",
                                       "org.apache.hyracks")
        logger_level_to_update = self.input.param("logger_level_to_update",
                                                  "FATAL")
        status, response, content = self.cbas_util.set_specific_log_level_on_cbas(
            logger_name, logger_level_to_update)
        self.assertTrue(status, msg="Status mismatch for SET")
        CbasLogging.DEFAULT_LOGGER_CONFIG_DICT[
            logger_name] = logger_level_to_update

        self.log.info("Add a new logger")
        logger_name = self.input.param("logger_name_to_add",
                                       "org.apache.hyracks123")
        logger_level_to_add = self.input.param("logger_level_to_add", "ALL")
        status, response, content = self.cbas_util.set_specific_log_level_on_cbas(
            logger_name, logger_level_to_add)
        self.assertTrue(status, msg="Status mismatch for SET")
        CbasLogging.DEFAULT_LOGGER_CONFIG_DICT[
            logger_name] = logger_level_to_add

        self.log.info("Verify logging configuration that we set on cbas Node")
        for name, level in CbasLogging.DEFAULT_LOGGER_CONFIG_DICT.items():
            status, content, response = self.cbas_util.get_specific_cbas_log_level(
                name)
            self.assertTrue(status,
                            msg="Response status incorrect for GET request")
            self.assertEquals(content,
                              level,
                              msg="Logger configuration mismatch for logger " +
                              name)

        self.sleep(
            timeout=10,
            message=
            "Waiting for logger configuration to be copied across cbas nodes")

        self.log.info("Verify logging configuration on other cbas node")
        for name, level in CbasLogging.DEFAULT_LOGGER_CONFIG_DICT.items():
            status, content, response = CbasUtil(
                self.cluster.master,
                self.cluster.cbas_nodes[0]).get_specific_cbas_log_level(name)
            self.assertTrue(status,
                            msg="Response status incorrect for GET request")
            self.assertEquals(content,
                              level,
                              msg="Logger configuration mismatch for logger " +
                              name)

        self.log.info("Read input params")
        process_name = self.input.param('process_name', None)
        service_name = self.input.param('service_name', None)
        restart_couchbase = self.input.param('restart_couchbase', False)
        reboot = self.input.param('reboot', False)
        kill_services = self.input.param('kill_services', False)

        self.log.info("Establish a remote connection")
        shell_cc = RemoteMachineShellConnection(self.cbas_node)
        shell_nc = RemoteMachineShellConnection(self.cluster.cbas_nodes[0])

        if kill_services:
            self.log.info("Kill the %s service on CC cbas node" % service_name)
            shell_cc.kill_process(process_name, service_name)

            self.log.info("Kill the %s service on other cbas node" %
                          service_name)
            shell_nc.kill_process(process_name, service_name)

        if restart_couchbase:
            self.log.info("Restart couchbase service")
            status, _, _ = self.cbas_util.restart_analytics_cluster_uri()
            self.assertTrue(status, msg="Failed to restart cbas")

        if reboot:
            self.log.info("Reboot couchbase CC node")
            shell = RemoteMachineShellConnection(self.cbas_node)
            shell.reboot_server_and_wait_for_cb_run(self.cluster_util,
                                                    self.cbas_node)
            shell.disconnect()

            self.log.info("Reboot couchbase NC node")
            shell = RemoteMachineShellConnection(self.cluster.cbas_nodes[0])
            shell.reboot_server_and_wait_for_cb_run(self.cluster_util,
                                                    self.cluster.cbas_nodes[0])
            shell.disconnect()

        self.log.info(
            "Wait for request to complete and cluster to be active: Using private ping() function"
        )
        cluster_recover_start_time = time.time()
        while time.time() < cluster_recover_start_time + 180:
            try:
                status, metrics, _, cbas_result, _ = self.cbas_util.execute_statement_on_cbas_util(
                    "set `import-private-functions` `true`;ping()")
                if status == "success":
                    break
            except:
                self.sleep(3, message="Wait for service to up")

        self.log.info("Verify logging configuration post service kill")
        for name, level in CbasLogging.DEFAULT_LOGGER_CONFIG_DICT.items():
            status, content, response = self.cbas_util.get_specific_cbas_log_level(
                name)
            self.assertTrue(status,
                            msg="Response status incorrect for GET request")
            self.assertEquals(content,
                              level,
                              msg="Logger configuration mismatch for logger " +
                              name)

        self.sleep(
            timeout=10,
            message=
            "Waiting for logger configuration to be copied across cbas nodes")

        self.log.info(
            "Verify logging configuration on other cbas node post service kill"
        )
        for name, level in CbasLogging.DEFAULT_LOGGER_CONFIG_DICT.items():
            status, content, response = CbasUtil(
                self.cluster.master,
                self.cluster.cbas_nodes[0]).get_specific_cbas_log_level(name)
            self.assertTrue(status,
                            msg="Response status incorrect for GET request")
            self.assertEquals(content,
                              level,
                              msg="Logger configuration mismatch for logger " +
                              name)