def test_chain_rebalance_out_cc(self):
        self.setup_for_test(skip_data_loading=True)
        self.ingestion_in_progress()

        total_cbas_nodes = len(self.otpNodes)
        while total_cbas_nodes > 1:
            cc_ip = self.cbas_util.retrieve_cc_ip(shell=self.shell)
            for otpnode in self.otpNodes:
                if otpnode.ip == cc_ip:
                    self.cluster_util.remove_node([otpnode],
                                                  wait_for_rebalance=True)
                    for server in self.cbas_servers:
                        if cc_ip != server.ip:
                            self.cbas_util.closeConn()
                            self.cbas_util = cbas_utils(self.master, server)
                            self.cbas_util.createConn("default")
                            self.cbas_node = server
                            break
#                     items_in_cbas_bucket, _ = self.cbas_util.get_num_items_in_cbas_dataset(self.cbas_dataset_name)
#                     self.log.info("Items before service restart: %s"%items_in_cbas_bucket)

                    items_in_cbas_bucket = 0
                    start_time = time.time()
                    while (items_in_cbas_bucket == 0 or items_in_cbas_bucket
                           == -1) and time.time() < start_time + 60:
                        try:
                            items_in_cbas_bucket, _ = self.cbas_util.get_num_items_in_cbas_dataset(
                                self.cbas_dataset_name)
                        except:
                            pass
                        self.sleep(1)
                    self.log.info(
                        "After rebalance operation docs in CBAS bucket : %s" %
                        items_in_cbas_bucket)
                    if items_in_cbas_bucket < self.num_items * 2 and items_in_cbas_bucket > self.num_items:
                        self.log.info(
                            "Data Ingestion Interrupted successfully")
                    elif items_in_cbas_bucket < self.num_items:
                        self.log.info(
                            "Data Ingestion did interrupted and restarting from 0."
                        )
                    else:
                        self.log.info(
                            "Data Ingestion did not interrupted but complete before rebalance operation."
                        )

                    query = "select count(*) from {0};".format(
                        self.cbas_dataset_name)
                    self.cbas_util._run_concurrent_queries(
                        query, "immediate", 10)
                    break
            total_cbas_nodes -= 1

        if not self.cbas_util.validate_cbas_dataset_items_count(
                self.cbas_dataset_name, self.num_items * 2):
            self.fail(
                "No. of items in CBAS dataset do not match that in the CB bucket"
            )
        self.ingest_more_data()
Beispiel #2
0
    def execute(self):
        try:
            from cbas.cbas_utils import cbas_utils
            utils = cbas_utils(self.master, self.cbas_server)
            utils.createConn(self.bucket)
            self.response, self.metrics, self.errors, self.results, self.handle = utils.execute_statement_on_cbas_util(
                self.statement)

            if self.response:
                self.state = CHECKING
                self.call()
            else:
                self.test_log.error("Some error in CBASQueryExecuteTask")
                traceback.print_exc(file=sys.stdout)
                self.state = FINISHED
                self.passed = False
                self.set_result(False)
        # catch and set all unexpected exceptions

        except Exception as e:
            self.state = FINISHED
            self.passed = False
            self.set_unexpected_exception(e)
Beispiel #3
0
    def test_rebalance_kv_rollback_create_ops(self):
        self.setup_for_test()
        items_before_persistence_stop = self.cbas_util.get_num_items_in_cbas_dataset(
            self.cbas_dataset_name)[0]
        self.log.info("Items in CBAS before persistence stop: %s" %
                      items_before_persistence_stop)
        # Stop Persistence on Node A & Node B
        self.log.info("Stopping persistence on NodeA")
        mem_client = MemcachedClientHelper.direct_client(
            self.master, self.cb_bucket_name)
        mem_client.stop_persistence()

        # Perform Create, Update, Delete ops in the CB bucket
        self.log.info("Performing Mutations")
        self.perform_doc_ops_in_all_cb_buckets(self.num_items / 2, "create",
                                               self.num_items,
                                               self.num_items * 3 / 2)

        kv_nodes = self.get_kv_nodes(self.servers, self.master)
        items_in_cb_bucket = 0
        if self.where_field and self.where_value:
            items_in_cb_bucket = RestConnection(self.master).query_tool(
                'select count(*) from %s where %s = "%s"' %
                (self.cb_bucket_name, self.where_field,
                 self.where_value))['results'][0]['$1']
        else:
            for node in kv_nodes:
                items_in_cb_bucket += self.get_item_count(
                    node, self.cb_bucket_name)
        # Validate no. of items in CBAS dataset
        self.assertTrue(
            self.cbas_util.validate_cbas_dataset_items_count(
                self.cbas_dataset_name, items_in_cb_bucket, 0),
            "No. of items in CBAS dataset do not match that in the CB bucket")

        # Count no. of items in CB & CBAS Buckets
        items_in_cbas_bucket, _ = self.cbas_util.get_num_items_in_cbas_dataset(
            self.cbas_dataset_name)

        self.log.info(
            "Before Rollback --- # docs in CB bucket : %s, # docs in CBAS bucket : %s",
            items_in_cb_bucket, items_in_cbas_bucket)

        self.assertTrue(
            items_in_cb_bucket == items_in_cbas_bucket,
            "Before Rollback : # Items in CBAS bucket does not match that in the CB bucket"
        )

        if self.CC:
            self.cluster_util.remove_node([self.otpNodes[0]],
                                          wait_for_rebalance=False)
            self.cbas_util.closeConn()
            self.cbas_util = cbas_utils(self.master, self.cbas_servers[0])
            self.cbas_util.createConn("default")
        else:
            self.cluster_util.remove_node([self.otpNodes[1]],
                                          wait_for_rebalance=False)

        # Kill memcached on Node A so that Node B becomes master
        self.log.info("Kill Memcached process on NodeA")
        shell = RemoteMachineShellConnection(self.master)
        shell.kill_memcached()
        self.sleep(2, "Wait for 2 secs for DCP rollback sent to CBAS.")
        curr = time.time()
        while items_in_cbas_bucket == -1 or (
                items_in_cbas_bucket != 0
                and items_in_cbas_bucket > items_before_persistence_stop):
            try:
                if curr + 120 < time.time():
                    break
                items_in_cbas_bucket, _ = self.cbas_util.get_num_items_in_cbas_dataset(
                    self.cbas_dataset_name)
                self.log.info("Items in CBAS: %s" % items_in_cbas_bucket)
            except:
                self.log.info(
                    "Probably rebalance is in progress and the reason for queries being failing."
                )
                pass
        self.assertTrue(items_in_cbas_bucket <= items_before_persistence_stop,
                        "Roll-back did not happen.")
        self.log.info("#######BINGO########\nROLLBACK HAPPENED")

        items_in_cb_bucket = 0
        curr = time.time()
        while items_in_cb_bucket != items_in_cbas_bucket or items_in_cb_bucket == 0:
            items_in_cb_bucket = 0
            items_in_cbas_bucket = 0
            if self.where_field and self.where_value:
                try:
                    items_in_cb_bucket = RestConnection(
                        self.master).query_tool(
                            'select count(*) from %s where %s = "%s"' %
                            (self.cb_bucket_name, self.where_field,
                             self.where_value))['results'][0]['$1']
                except:
                    self.log.info(
                        "Indexer in rollback state. Query failed. Pass and move ahead."
                    )
                    pass
            else:
                for node in kv_nodes:
                    items_in_cb_bucket += self.get_item_count(
                        node, self.cb_bucket_name)

            self.log.info("Items in CB bucket after rollback: %s" %
                          items_in_cb_bucket)
            try:
                items_in_cbas_bucket, _ = self.cbas_util.get_num_items_in_cbas_dataset(
                    self.cbas_dataset_name)
            except:
                pass
            if curr + 120 < time.time():
                break
        str_time = time.time()
        while self.rest._rebalance_progress_status(
        ) == "running" and time.time() < str_time + 300:
            self.sleep(1)
            self.log.info("Waiting for rebalance to complete")

        self.log.info(
            "After Rollback --- # docs in CB bucket : %s, # docs in CBAS bucket : %s",
            items_in_cb_bucket, items_in_cbas_bucket)

        self.assertTrue(
            items_in_cb_bucket == items_in_cbas_bucket,
            "After Rollback : # Items in CBAS bucket does not match that in the CB bucket"
        )
Beispiel #4
0
    def test_logging_configurations_are_restored_post_service_restarts(self):

        self.log.info("Add a cbas node")
        result = self.add_node(self.cbas_servers[0],
                               services=["cbas"],
                               rebalance=True)
        self.assertTrue(result, msg="Failed to add CBAS node")

        self.log.info("Delete all loggers")
        self.cbas_util.delete_all_loggers_on_cbas()

        self.log.info("Set the logging level using the json object")
        status, content, response = self.cbas_util.set_log_level_on_cbas(
            CbasLogging.DEFAULT_LOGGER_CONFIG_DICT)
        self.assertTrue(status,
                        msg="Response status incorrect for SET request")

        self.log.info("Delete specific logger")
        logger_name = self.input.param("logger_name_to_delete",
                                       "com.couchbase.client.core.node")
        status, content, response = self.cbas_util.delete_specific_cbas_log_level(
            logger_name)
        self.assertTrue(status, msg="Status mismatch for DELETE")
        del CbasLogging.DEFAULT_LOGGER_CONFIG_DICT[logger_name]

        self.log.info("Update specific logger")
        logger_name = self.input.param("logger_name_to_update",
                                       "org.apache.hyracks")
        logger_level_to_update = self.input.param("logger_level_to_update",
                                                  "FATAL")
        status, response, content = self.cbas_util.set_specific_log_level_on_cbas(
            logger_name, logger_level_to_update)
        self.assertTrue(status, msg="Status mismatch for SET")
        CbasLogging.DEFAULT_LOGGER_CONFIG_DICT[
            logger_name] = logger_level_to_update

        self.log.info("Add a new logger")
        logger_name = self.input.param("logger_name_to_add",
                                       "org.apache.hyracks123")
        logger_level_to_add = self.input.param("logger_level_to_add", "ALL")
        status, response, content = self.cbas_util.set_specific_log_level_on_cbas(
            logger_name, logger_level_to_add)
        self.assertTrue(status, msg="Status mismatch for SET")
        CbasLogging.DEFAULT_LOGGER_CONFIG_DICT[
            logger_name] = logger_level_to_add

        self.log.info("Verify logging configuration that we set on cbas Node")
        for name, level in CbasLogging.DEFAULT_LOGGER_CONFIG_DICT.items():
            status, content, response = self.cbas_util.get_specific_cbas_log_level(
                name)
            self.assertTrue(status,
                            msg="Response status incorrect for GET request")
            self.assertEquals(content,
                              level,
                              msg="Logger configuration mismatch for logger " +
                              name)

        self.sleep(
            timeout=10,
            message=
            "Waiting for logger configuration to be copied across cbas nodes")

        self.log.info("Verify logging configuration on other cbas node")
        for name, level in CbasLogging.DEFAULT_LOGGER_CONFIG_DICT.items():
            status, content, response = cbas_utils(
                self.master,
                self.cbas_servers[0]).get_specific_cbas_log_level(name)
            self.assertTrue(status,
                            msg="Response status incorrect for GET request")
            self.assertEquals(content,
                              level,
                              msg="Logger configuration mismatch for logger " +
                              name)

        self.log.info("Read input params")
        process_name = self.input.param('process_name', None)
        service_name = self.input.param('service_name', None)
        restart_couchbase = self.input.param('restart_couchbase', False)
        reboot = self.input.param('reboot', False)
        kill_services = self.input.param('kill_services', False)

        self.log.info("Establish a remote connection")
        shell_cc = RemoteMachineShellConnection(self.cbas_node)
        shell_nc = RemoteMachineShellConnection(self.cbas_servers[0])

        if kill_services:
            self.log.info("Kill the %s service on CC cbas node" % service_name)
            shell_cc.kill_process(process_name, service_name)

            self.log.info("Kill the %s service on other cbas node" %
                          service_name)
            shell_nc.kill_process(process_name, service_name)

        if restart_couchbase:
            self.log.info("Restart couchbase CC node ")
            shell_cc.restart_couchbase()

            self.log.info("Restart couchbase NC node ")
            shell_nc.restart_couchbase()

        if reboot:
            self.log.info("Reboot couchbase CC node")
            NodeHelper.reboot_server(self.cbas_node, self)

            self.log.info("Reboot couchbase NC node")
            NodeHelper.reboot_server(self.cbas_servers[0], self)

        end_time = datetime.datetime.now() + datetime.timedelta(minutes=int(1))
        self.log.info(
            "Wait for nodes to be bootstrapped, neglect the unreachable server exceptions"
        )
        while datetime.datetime.now() < end_time:
            try:
                self.log.info("Get the logging configurations")
                status, content, response = self.cbas_util.get_log_level_on_cbas(
                )
                self.assertTrue(
                    status, msg="Response status incorrect for GET request")

                self.log.info("Convert response to a dictionary")
                log_dict = CbasLogging.convert_logger_get_result_to_a_dict(
                    content)
                if len(log_dict) >= len(
                        CbasLogging.DEFAULT_LOGGER_CONFIG_DICT):
                    break
            except Exception as e:
                pass

        self.log.info("Verify logging configuration post service kill")
        for name, level in CbasLogging.DEFAULT_LOGGER_CONFIG_DICT.items():
            status, content, response = self.cbas_util.get_specific_cbas_log_level(
                name)
            self.assertTrue(status,
                            msg="Response status incorrect for GET request")
            self.assertEquals(content,
                              level,
                              msg="Logger configuration mismatch for logger " +
                              name)

        self.sleep(
            timeout=10,
            message=
            "Waiting for logger configuration to be copied across cbas nodes")

        self.log.info(
            "Verify logging configuration on other cbas node post service kill"
        )
        for name, level in CbasLogging.DEFAULT_LOGGER_CONFIG_DICT.items():
            status, content, response = cbas_utils(
                self.master,
                self.cbas_servers[0]).get_specific_cbas_log_level(name)
            self.assertTrue(status,
                            msg="Response status incorrect for GET request")
            self.assertEquals(content,
                              level,
                              msg="Logger configuration mismatch for logger " +
                              name)
Beispiel #5
0
    def test_logging_configurations_are_shared_across_cbas_node(self):

        self.log.info("Add a cbas node")
        result = self.add_node(self.cbas_servers[0],
                               services=["cbas"],
                               rebalance=True)
        self.assertTrue(result, msg="Failed to add CBAS node")

        self.log.info("Delete all loggers")
        self.cbas_util.delete_all_loggers_on_cbas()

        self.log.info(
            "Set the logging level using json object from default logger config dictionary on master cbas node"
        )
        status, content, response = self.cbas_util.set_log_level_on_cbas(
            CbasLogging.DEFAULT_LOGGER_CONFIG_DICT)
        self.assertTrue(status,
                        msg="Response status incorrect for SET request")

        self.log.info("Verify logging configuration that we set on cbas Node")
        for name, level in CbasLogging.DEFAULT_LOGGER_CONFIG_DICT.items():
            status, content, response = self.cbas_util.get_specific_cbas_log_level(
                name)
            self.assertTrue(status,
                            msg="Response status incorrect for GET request")
            self.assertEquals(content,
                              level,
                              msg="Logger configuration mismatch for logger " +
                              name)

        self.sleep(
            timeout=10,
            message=
            "Waiting for logger configuration to be copied across cbas nodes")

        self.log.info("Verify logging configuration on other cbas node")
        for name, level in CbasLogging.DEFAULT_LOGGER_CONFIG_DICT.items():
            status, content, response = cbas_utils(
                self.master,
                self.cbas_servers[0]).get_specific_cbas_log_level(name)
            self.assertTrue(status,
                            msg="Response status incorrect for GET request")
            self.assertEquals(content,
                              level,
                              msg="Logger configuration mismatch for logger " +
                              name)

        self.log.info("Update logging configuration on other cbas node")
        logger_level = self.input.param("logger_level", "FATAL")
        logger_name = self.input.param("logger_name", "org.apache.asterix")
        status, content, response = cbas_utils(
            self.master, self.cbas_servers[0]).set_specific_log_level_on_cbas(
                logger_name, logger_level)
        self.assertTrue(status, msg="Status mismatch for SET")

        self.sleep(
            timeout=10,
            message=
            "Waiting for logger configuration to be copied across cbas nodes")

        self.log.info("Assert log level on master cbas node")
        status, content, response = self.cbas_util.get_specific_cbas_log_level(
            logger_name)
        self.assertTrue(status, msg="Status mismatch for GET")
        self.assertEquals(content,
                          logger_level,
                          msg="Logger configuration mismatch for " +
                          logger_name)
Beispiel #6
0
    def test_logging_configurations_are_restored_post_service_restarts(self):

        self.log.info("Add a cbas node")
        result = self.add_node(self.cbas_servers[0],
                               services=["cbas"],
                               rebalance=True)
        self.assertTrue(result, msg="Failed to add CBAS node")

        self.log.info("Delete all loggers")
        self.cbas_util.delete_all_loggers_on_cbas()

        self.log.info("Set the logging level using the json object")
        status, content, response = self.cbas_util.set_log_level_on_cbas(
            CbasLogging.DEFAULT_LOGGER_CONFIG_DICT)
        self.assertTrue(status,
                        msg="Response status incorrect for SET request")

        self.log.info("Delete specific logger")
        logger_name = self.input.param("logger_name_to_delete",
                                       "com.couchbase.client.core.node")
        status, content, response = self.cbas_util.delete_specific_cbas_log_level(
            logger_name)
        self.assertTrue(status, msg="Status mismatch for DELETE")
        del CbasLogging.DEFAULT_LOGGER_CONFIG_DICT[logger_name]

        self.log.info("Update specific logger")
        logger_name = self.input.param("logger_name_to_update",
                                       "org.apache.hyracks")
        logger_level_to_update = self.input.param("logger_level_to_update",
                                                  "FATAL")
        status, response, content = self.cbas_util.set_specific_log_level_on_cbas(
            logger_name, logger_level_to_update)
        self.assertTrue(status, msg="Status mismatch for SET")
        CbasLogging.DEFAULT_LOGGER_CONFIG_DICT[
            logger_name] = logger_level_to_update

        self.log.info("Add a new logger")
        logger_name = self.input.param("logger_name_to_add",
                                       "org.apache.hyracks123")
        logger_level_to_add = self.input.param("logger_level_to_add", "ALL")
        status, response, content = self.cbas_util.set_specific_log_level_on_cbas(
            logger_name, logger_level_to_add)
        self.assertTrue(status, msg="Status mismatch for SET")
        CbasLogging.DEFAULT_LOGGER_CONFIG_DICT[
            logger_name] = logger_level_to_add

        self.log.info("Verify logging configuration that we set on cbas Node")
        for name, level in CbasLogging.DEFAULT_LOGGER_CONFIG_DICT.items():
            status, content, response = self.cbas_util.get_specific_cbas_log_level(
                name)
            self.assertTrue(status,
                            msg="Response status incorrect for GET request")
            self.assertEquals(content,
                              level,
                              msg="Logger configuration mismatch for logger " +
                              name)

        self.sleep(
            timeout=10,
            message=
            "Waiting for logger configuration to be copied across cbas nodes")

        self.log.info("Verify logging configuration on other cbas node")
        for name, level in CbasLogging.DEFAULT_LOGGER_CONFIG_DICT.items():
            status, content, response = cbas_utils(
                self.master,
                self.cbas_servers[0]).get_specific_cbas_log_level(name)
            self.assertTrue(status,
                            msg="Response status incorrect for GET request")
            self.assertEquals(content,
                              level,
                              msg="Logger configuration mismatch for logger " +
                              name)

        self.log.info("Read input params")
        process_name = self.input.param('process_name', None)
        service_name = self.input.param('service_name', None)
        restart_couchbase = self.input.param('restart_couchbase', False)
        reboot = self.input.param('reboot', False)
        kill_services = self.input.param('kill_services', False)

        self.log.info("Establish a remote connection")
        shell_cc = RemoteMachineShellConnection(self.cbas_node)
        shell_nc = RemoteMachineShellConnection(self.cbas_servers[0])

        if kill_services:
            self.log.info("Kill the %s service on CC cbas node" % service_name)
            shell_cc.kill_process(process_name, service_name)

            self.log.info("Kill the %s service on other cbas node" %
                          service_name)
            shell_nc.kill_process(process_name, service_name)

        if restart_couchbase:
            self.log.info("Restart couchbase CC node ")
            shell_cc.restart_couchbase()

            self.log.info("Restart couchbase NC node ")
            shell_nc.restart_couchbase()

        if reboot:
            self.log.info("Reboot couchbase CC node")
            NodeHelper.reboot_server(self.cbas_node, self)

            self.log.info("Reboot couchbase NC node")
            NodeHelper.reboot_server(self.cbas_servers[0], self)

        self.log.info(
            "Wait for request to complete and cluster to be active: Using private ping() function"
        )
        cluster_recover_start_time = time.time()
        while time.time() < cluster_recover_start_time + 180:
            try:
                status, metrics, _, cbas_result, _ = self.cbas_util.execute_statement_on_cbas_util(
                    "set `import-private-functions` `true`;ping();")
                if status == "success":
                    break
            except:
                self.sleep(2, message="Wait for service to up again")

        self.log.info("Verify logging configuration post service kill")
        for name, level in CbasLogging.DEFAULT_LOGGER_CONFIG_DICT.items():
            status, content, response = self.cbas_util.get_specific_cbas_log_level(
                name)
            self.assertTrue(status,
                            msg="Response status incorrect for GET request")
            self.assertEquals(content,
                              level,
                              msg="Logger configuration mismatch for logger " +
                              name)

        self.sleep(
            timeout=10,
            message=
            "Waiting for logger configuration to be copied across cbas nodes")

        self.log.info(
            "Verify logging configuration on other cbas node post service kill"
        )
        for name, level in CbasLogging.DEFAULT_LOGGER_CONFIG_DICT.items():
            status, content, response = cbas_utils(
                self.master,
                self.cbas_servers[0]).get_specific_cbas_log_level(name)
            self.assertTrue(status,
                            msg="Response status incorrect for GET request")
            self.assertEquals(content,
                              level,
                              msg="Logger configuration mismatch for logger " +
                              name)
Beispiel #7
0
    def test_stop_network_ingest_data(self):
        self.setup_for_test()
        self.cbas_node_type = self.input.param('cbas_node_type', None)

        query = "select sleep(count(*),50000) from {0};".format(
            self.cbas_dataset_name)
        handles = self.cbas_util._run_concurrent_queries(query, "async", 10)
        self.ingestion_in_progress()

        # Add the code for stop network here:
        if self.cbas_node_type:
            if self.cbas_node_type == "CC":
                node_in_test = self.cbas_node
                self.cbas_util = cbas_utils(self.master, self.cbas_servers[0])
                self.cbas_util.createConn("default")
            else:
                node_in_test = self.cbas_servers[0]
        # Stop network on KV node to mimic n/w partition on KV
        else:
            node_in_test = self.master

        items_in_cbas_bucket_before, _ = self.cbas_util.get_num_items_in_cbas_dataset(
            self.cbas_dataset_name)
        self.log.info("Intems before network down: %s" %
                      items_in_cbas_bucket_before)
        RemoteMachineShellConnection(node_in_test).stop_network("30")
        #         self.sleep(40, "Wait for network to come up.")

        items_in_cbas_bucket = 0
        start_time = time.time()
        while (items_in_cbas_bucket == 0 or items_in_cbas_bucket
               == -1) and time.time() < start_time + 60:
            try:
                items_in_cbas_bucket, _ = self.cbas_util.get_num_items_in_cbas_dataset(
                    self.cbas_dataset_name)
            except:
                pass
#         items_in_cbas_bucket_after, _ = self.cbas_util.get_num_items_in_cbas_dataset(self.cbas_dataset_name)
        self.log.info("Items after network is up: %s" % items_in_cbas_bucket)
        #         start_time = time.time()
        #         while items_in_cbas_bucket_after <=0 and time.time()<start_time+60:
        #             items_in_cbas_bucket_after, _ = self.cbas_util.get_num_items_in_cbas_dataset(self.cbas_dataset_name)
        #             self.sleep(1)
        #         items_in_cbas_bucket = items_in_cbas_bucket_after
        if items_in_cbas_bucket < self.num_items * 3 and items_in_cbas_bucket > self.num_items:
            self.log.info("Data Ingestion Interrupted successfully")
        elif items_in_cbas_bucket < self.num_items:
            self.log.info(
                "Data Ingestion did not interrupted but restarting from 0.")
        else:
            self.log.info(
                "Data Ingestion did not interrupted but complete before service restart."
            )

        run_count = 0
        fail_count = 0
        success_count = 0
        aborted_count = 0
        shell = RemoteMachineShellConnection(node_in_test)
        for handle in handles:
            status, hand = self.cbas_util.retrieve_request_status_using_handle(
                node_in_test, handle, shell)
            if status == "running":
                run_count += 1
                self.log.info("query with handle %s is running." % handle)
            elif status == "failed":
                fail_count += 1
                self.log.info("query with handle %s is failed." % handle)
            elif status == "success":
                success_count += 1
                self.log.info("query with handle %s is successful." % handle)
            else:
                aborted_count += 1
                self.log.info("Queued job is deleted: %s" % status)

        self.log.info("After service restart %s queued jobs are Running." %
                      run_count)
        self.log.info("After service restart %s queued jobs are Failed." %
                      fail_count)
        self.log.info("After service restart %s queued jobs are Successful." %
                      success_count)
        self.log.info("After service restart %s queued jobs are Aborted." %
                      aborted_count)

        if self.cbas_node_type == "NC":
            self.assertTrue(fail_count + aborted_count == 0,
                            "Some queries failed/aborted")

        query = "select count(*) from {0};".format(self.cbas_dataset_name)
        self.cbas_util._run_concurrent_queries(query, "immediate", 100)

        if not self.cbas_util.validate_cbas_dataset_items_count(
                self.cbas_dataset_name, self.num_items * 3):
            self.fail(
                "No. of items in CBAS dataset do not match that in the CB bucket"
            )
Beispiel #8
0
    def test_disk_full_ingest_data(self):
        self.cbas_node_type = self.input.param('cbas_node_type', None)
        if self.cbas_node_type == "CC":
            node_in_test = self.cbas_node
            self.cbas_util = cbas_utils(self.master, self.cbas_servers[0])
        else:
            node_in_test = self.cbas_servers[0]

        remote_client = RemoteMachineShellConnection(node_in_test)
        output, error = remote_client.execute_command("rm -rf full_disk*",
                                                      use_channel=True)
        remote_client.log_command_output(output, error)

        self.setup_for_test()

        query = "select sleep(count(*),50000) from {0};".format(
            self.cbas_dataset_name)
        handles = self.cbas_util._run_concurrent_queries(query, "async", 10)

        def _get_disk_usage_in_MB(remote_client):
            disk_info = remote_client.get_disk_info(in_MB=True)
            disk_space = disk_info[1].split()[-3][:-1]
            return disk_space

        du = int(_get_disk_usage_in_MB(remote_client)) - 50
        chunk_size = 1024
        while int(du) > 0:
            output, error = remote_client.execute_command(
                "dd if=/dev/zero of=full_disk{0} bs={1}M count=1".format(
                    str(du) + "_MB" + str(time.time()), chunk_size),
                use_channel=True)
            remote_client.log_command_output(output, error)
            du -= 1024
            if du < 1024:
                chunk_size = du

        self.ingestion_in_progress()

        items_in_cbas_bucket_before, _ = self.cbas_util.get_num_items_in_cbas_dataset(
            self.cbas_dataset_name)
        items_in_cbas_bucket_after, _ = self.cbas_util.get_num_items_in_cbas_dataset(
            self.cbas_dataset_name)
        try:
            while items_in_cbas_bucket_before != items_in_cbas_bucket_after:
                items_in_cbas_bucket_before, _ = self.cbas_util.get_num_items_in_cbas_dataset(
                    self.cbas_dataset_name)
                self.sleep(2)
                items_in_cbas_bucket_after, _ = self.cbas_util.get_num_items_in_cbas_dataset(
                    self.cbas_dataset_name)
        except:
            self.log.info("Ingestion interrupted and server seems to be down")

        if items_in_cbas_bucket_before == self.num_items * 3:
            self.log.info("Data Ingestion did not interrupted but completed.")
        elif items_in_cbas_bucket_before < self.num_items * 3:
            self.log.info("Data Ingestion Interrupted successfully")

        output, error = remote_client.execute_command("rm -rf full_disk*",
                                                      use_channel=True)
        remote_client.log_command_output(output, error)
        remote_client.disconnect()
        self.sleep(
            10,
            "wait for service to come up after disk space is made available.")

        run_count = 0
        fail_count = 0
        success_count = 0
        aborted_count = 0
        shell = RemoteMachineShellConnection(node_in_test)
        for handle in handles:
            status, hand = self.cbas_util.retrieve_request_status_using_handle(
                node_in_test, handle, shell)
            if status == "running":
                run_count += 1
                self.log.info("query with handle %s is running." % handle)
            elif status == "failed":
                fail_count += 1
                self.log.info("query with handle %s is failed." % handle)
            elif status == "success":
                success_count += 1
                self.log.info("query with handle %s is successful." % handle)
            else:
                aborted_count += 1
                self.log.info("Queued job is deleted: %s" % status)

        self.log.info("After service restart %s queued jobs are Running." %
                      run_count)
        self.log.info("After service restart %s queued jobs are Failed." %
                      fail_count)
        self.log.info("After service restart %s queued jobs are Successful." %
                      success_count)
        self.log.info("After service restart %s queued jobs are Aborted." %
                      aborted_count)

        if self.cbas_node_type == "NC":
            self.assertTrue(fail_count + aborted_count == 0,
                            "Some queries failed/aborted")

        self.sleep(60)

        query = "select count(*) from {0};".format(self.cbas_dataset_name)
        self.cbas_util._run_concurrent_queries(query, "immediate", 100)

        count_n1ql = self.rest.query_tool(
            'select count(*) from `%s`' %
            (self.cb_bucket_name))['results'][0]['$1']
        if not self.cbas_util.validate_cbas_dataset_items_count(
                self.cbas_dataset_name, count_n1ql):
            self.fail(
                "No. of items in CBAS dataset do not match that in the CB bucket"
            )
Beispiel #9
0
    def test_stop_start_service_ingest_data(self):
        self.setup_for_test()
        self.cbas_node_type = self.input.param('cbas_node_type', None)

        query = "select sleep(count(*),50000) from {0};".format(
            self.cbas_dataset_name)
        handles = self.cbas_util._run_concurrent_queries(query, "async", 10)
        self.ingestion_in_progress()

        if self.cbas_node_type == "CC":
            node_in_test = self.cbas_node
            self.cbas_util.closeConn()
            self.cbas_util = cbas_utils(self.master, self.cbas_servers[0])
            self.cbas_util.createConn("default")
        else:
            node_in_test = self.cbas_servers[0]

        items_in_cbas_bucket, _ = self.cbas_util.get_num_items_in_cbas_dataset(
            self.cbas_dataset_name)
        self.log.info("Items before service restart: %s" %
                      items_in_cbas_bucket)

        self.log.info("Gracefully stopping service on node %s" % node_in_test)
        NodeHelper.stop_couchbase(node_in_test)
        NodeHelper.start_couchbase(node_in_test)
        NodeHelper.wait_service_started(node_in_test)
        #         self.sleep(10, "wait for service to come up.")
        #
        #         items_in_cbas_bucket, _ = self.cbas_util.get_num_items_in_cbas_dataset(self.cbas_dataset_name)
        #         self.log.info("After graceful STOPPING/STARTING service docs in CBAS bucket : %s"%items_in_cbas_bucket)
        #
        #         start_time = time.time()
        #         while items_in_cbas_bucket <=0 and time.time()<start_time+60:
        #             items_in_cbas_bucket, _ = self.cbas_util.get_num_items_in_cbas_dataset(self.cbas_dataset_name)
        #             self.sleep(1)
        items_in_cbas_bucket = 0
        start_time = time.time()
        while (items_in_cbas_bucket == 0 or items_in_cbas_bucket
               == -1) and time.time() < start_time + 60:
            try:
                items_in_cbas_bucket, _ = self.cbas_util.get_num_items_in_cbas_dataset(
                    self.cbas_dataset_name)
            except:
                pass

        if items_in_cbas_bucket < self.num_items * 3 and items_in_cbas_bucket > self.num_items:
            self.log.info("Data Ingestion Interrupted successfully")
        elif items_in_cbas_bucket < self.num_items:
            self.log.info(
                "Data Ingestion did not interrupted but restarting from 0.")
        else:
            self.log.info(
                "Data Ingestion did not interrupted but complete before service restart."
            )

        run_count = 0
        fail_count = 0
        success_count = 0
        aborted_count = 0
        shell = RemoteMachineShellConnection(node_in_test)
        for handle in handles:
            status, hand = self.cbas_util.retrieve_request_status_using_handle(
                node_in_test, handle, shell)
            if status == "running":
                run_count += 1
                self.log.info("query with handle %s is running." % handle)
            elif status == "failed":
                fail_count += 1
                self.log.info("query with handle %s is failed." % handle)
            elif status == "success":
                success_count += 1
                self.log.info("query with handle %s is successful." % handle)
            else:
                aborted_count += 1
                self.log.info("Queued job is deleted: %s" % status)

        self.log.info("After service restart %s queued jobs are Running." %
                      run_count)
        self.log.info("After service restart %s queued jobs are Failed." %
                      fail_count)
        self.log.info("After service restart %s queued jobs are Successful." %
                      success_count)
        self.log.info("After service restart %s queued jobs are Aborted." %
                      aborted_count)

        if self.cbas_node_type == "NC":
            self.assertTrue(fail_count + aborted_count == 0,
                            "Some queries failed/aborted")

        query = "select count(*) from {0};".format(self.cbas_dataset_name)
        self.cbas_util._run_concurrent_queries(query, "immediate", 100)

        count_n1ql = self.rest.query_tool(
            'select count(*) from `%s`' %
            (self.cb_bucket_name))['results'][0]['$1']
        if not self.cbas_util.validate_cbas_dataset_items_count(
                self.cbas_dataset_name, count_n1ql):
            self.fail(
                "No. of items in CBAS dataset do not match that in the CB bucket"
            )
    def test_failover(self):
        self.setup_for_test(skip_data_loading=True)
        self.rebalance_node = self.input.param('rebalance_node', 'CC')
        self.how_many = self.input.param('how_many', 1)
        self.restart_rebalance = self.input.param('restart_rebalance', False)
        self.replica_change = self.input.param('replica_change', 0)
        self.add_back = self.input.param('add_back', False)

        query = "select sleep(count(*),50000) from {0};".format(
            self.cbas_dataset_name)
        handles = self.cbas_util._run_concurrent_queries(query, "async", 10)
        self.ingestion_in_progress()

        if self.rebalance_node == "CC":
            node_in_test = [self.cbas_node]
            otpNodes = [self.otpNodes[0]]
            self.cbas_util.closeConn()
            self.cbas_util = cbas_utils(self.master, self.cbas_servers[0])
            self.cbas_util.createConn("default")

            self.cbas_node = self.cbas_servers[0]
        elif self.rebalance_node == "NC":
            node_in_test = self.cbas_servers[:self.how_many]
            otpNodes = self.nc_otpNodes[:self.how_many]
        else:
            node_in_test = [self.cbas_node] + self.cbas_servers[:self.how_many]
            otpNodes = self.otpNodes[:self.how_many + 1]
            self.cbas_util.closeConn()
            self.cbas_util = cbas_utils(self.master,
                                        self.cbas_servers[self.how_many])
            self.cbas_util.createConn("default")

        replicas_before_rebalance = len(
            self.cbas_util.get_replicas_info(self.shell))
        items_in_cbas_bucket = 0
        start_time = time.time()
        while (items_in_cbas_bucket == 0 or items_in_cbas_bucket
               == -1) and time.time() < start_time + 60:
            try:
                items_in_cbas_bucket, _ = self.cbas_util.get_num_items_in_cbas_dataset(
                    self.cbas_dataset_name)
            except:
                pass
            self.sleep(1)
        self.log.info("Items before failover node: %s" % items_in_cbas_bucket)

        if self.restart_rebalance:
            graceful_failover = self.input.param("graceful_failover", False)
            failover_task = self._cb_cluster.async_failover(
                self.input.servers, node_in_test, graceful_failover)
            failover_task.get_result()
            if self.add_back:
                for otpnode in otpNodes:
                    self.rest.set_recovery_type('ns_1@' + otpnode.ip, "full")
                    self.rest.add_back_node('ns_1@' + otpnode.ip)
                self.rebalance(wait_for_completion=False)
            else:
                self.rebalance(ejected_nodes=[node.id for node in otpNodes],
                               wait_for_completion=False)
            self.sleep(2)
            if self.rest._rebalance_progress_status() == "running":
                self.assertTrue(self.rest.stop_rebalance(wait_timeout=120),
                                "Failed while stopping rebalance.")
                if self.add_back:
                    self.rebalance(wait_for_completion=False)
                else:
                    self.rebalance(
                        ejected_nodes=[node.id for node in otpNodes],
                        wait_for_completion=False)
            else:
                self.fail(
                    "Rebalance completed before the test could have stopped rebalance."
                )
        else:
            graceful_failover = self.input.param("graceful_failover", False)
            failover_task = self._cb_cluster.async_failover(
                self.input.servers, node_in_test, graceful_failover)
            failover_task.get_result()
            if self.add_back:
                for otpnode in otpNodes:
                    self.rest.set_recovery_type('ns_1@' + otpnode.ip, "full")
                    self.rest.add_back_node('ns_1@' + otpnode.ip)
            self.rebalance(wait_for_completion=False)

        replicas_before_rebalance -= self.replica_change
        self.sleep(5)
        str_time = time.time()
        while self.rest._rebalance_progress_status(
        ) == "running" and time.time() < str_time + 300:
            replicas = self.cbas_util.get_replicas_info(self.shell)
            if replicas:
                for replica in replicas:
                    self.log.info("replica state during rebalance: %s" %
                                  replica['status'])
        self.sleep(15)
        replicas = self.cbas_util.get_replicas_info(self.shell)
        replicas_after_rebalance = len(replicas)
        self.assertEqual(
            replicas_after_rebalance, replicas_before_rebalance,
            "%s,%s" % (replicas_after_rebalance, replicas_before_rebalance))

        for replica in replicas:
            self.log.info("replica state during rebalance: %s" %
                          replica['status'])
            self.assertEqual(
                replica['status'], "IN_SYNC",
                "Replica state is incorrect: %s" % replica['status'])

        items_in_cbas_bucket = 0
        start_time = time.time()
        while (items_in_cbas_bucket == 0 or items_in_cbas_bucket
               == -1) and time.time() < start_time + 60:
            try:
                items_in_cbas_bucket, _ = self.cbas_util.get_num_items_in_cbas_dataset(
                    self.cbas_dataset_name)
            except:
                pass
            self.sleep(1)
        self.log.info("After rebalance operation docs in CBAS bucket : %s" %
                      items_in_cbas_bucket)
        if items_in_cbas_bucket < self.num_items * 2 and items_in_cbas_bucket > self.num_items:
            self.log.info("Data Ingestion Interrupted successfully")
        elif items_in_cbas_bucket < self.num_items:
            self.log.info(
                "Data Ingestion did interrupted and restarting from 0.")
        else:
            self.log.info(
                "Data Ingestion did not interrupted but complete before rebalance operation."
            )

        run_count = 0
        fail_count = 0
        success_count = 0
        aborted_count = 0
        shell = RemoteMachineShellConnection(node_in_test[0])
        for handle in handles:
            status, hand = self.cbas_util.retrieve_request_status_using_handle(
                node_in_test, handle, shell)
            if status == "running":
                run_count += 1
                self.log.info("query with handle %s is running." % handle)
            elif status == "failed":
                fail_count += 1
                self.log.info("query with handle %s is failed." % handle)
            elif status == "success":
                success_count += 1
                self.log.info("query with handle %s is successful." % handle)
            else:
                aborted_count += 1
                self.log.info("Queued job is deleted: %s" % status)

        self.log.info("After service restart %s queued jobs are Running." %
                      run_count)
        self.log.info("After service restart %s queued jobs are Failed." %
                      fail_count)
        self.log.info("After service restart %s queued jobs are Successful." %
                      success_count)
        self.log.info("After service restart %s queued jobs are Aborted." %
                      aborted_count)

        if self.rebalance_node == "NC":
            self.assertTrue(aborted_count == 0, "Some queries aborted")

        query = "select count(*) from {0};".format(self.cbas_dataset_name)
        self.cbas_util._run_concurrent_queries(query, "immediate", 100)

        if not self.cbas_util.validate_cbas_dataset_items_count(
                self.cbas_dataset_name, self.num_items * 2):
            self.fail(
                "No. of items in CBAS dataset do not match that in the CB bucket"
            )

        self.ingest_more_data()
    def test_cc_swap_rebalance(self):
        self.restart_rebalance = self.input.param('restart_rebalance', False)

        self.setup_for_test(skip_data_loading=True)
        query = "select sleep(count(*),50000) from {0};".format(
            self.cbas_dataset_name)
        handles = self.cbas_util._run_concurrent_queries(query, "async", 10)
        self.ingestion_in_progress()

        replicas_before_rebalance = len(
            self.cbas_util.get_replicas_info(self.shell))

        self.cluster_util.add_node(node=self.cbas_servers[-1], rebalance=False)
        swap_nc = self.input.param('swap_nc', False)
        if not swap_nc:
            out_nodes = [self.otpNodes[0]]
            self.cbas_util.closeConn()
            self.cbas_util = cbas_utils(self.master, self.cbas_servers[0])
            self.cbas_util.createConn("default")
            self.cbas_node = self.cbas_servers[0]
        else:
            out_nodes = [self.otpNodes[1]]

        self.cluster_util.remove_node(out_nodes, wait_for_rebalance=False)
        self.sleep(5, "Wait for sometime after rebalance started.")
        if self.restart_rebalance:
            if self.rest._rebalance_progress_status() == "running":
                self.assertTrue(self.rest.stop_rebalance(wait_timeout=120),
                                "Failed while stopping rebalance.")
                self.sleep(10)
            else:
                self.fail(
                    "Rebalance completed before the test could have stopped rebalance."
                )
            self.rebalance(ejected_nodes=[node.id for node in out_nodes],
                           wait_for_completion=False)
        self.sleep(5)
        str_time = time.time()
        while self.rest._rebalance_progress_status(
        ) == "running" and time.time() < str_time + 300:
            replicas = self.cbas_util.get_replicas_info(self.shell)
            if replicas:
                for replica in replicas:
                    self.log.info("replica state during rebalance: %s" %
                                  replica['status'])
        self.sleep(20)

        replicas = self.cbas_util.get_replicas_info(self.shell)
        replicas_after_rebalance = len(replicas)
        self.assertEqual(
            replicas_after_rebalance, replicas_before_rebalance,
            "%s,%s" % (replicas_after_rebalance, replicas_before_rebalance))

        for replica in replicas:
            self.log.info("replica state during rebalance: %s" %
                          replica['status'])
            self.assertEqual(
                replica['status'], "IN_SYNC",
                "Replica state is incorrect: %s" % replica['status'])

#         items_in_cbas_bucket, _ = self.cbas_util.get_num_items_in_cbas_dataset(self.cbas_dataset_name)
#         self.log.info("Items before service restart: %s"%items_in_cbas_bucket)

        items_in_cbas_bucket = 0
        start_time = time.time()
        while (items_in_cbas_bucket == 0 or items_in_cbas_bucket
               == -1) and time.time() < start_time + 60:
            try:
                items_in_cbas_bucket, _ = self.cbas_util.get_num_items_in_cbas_dataset(
                    self.cbas_dataset_name)
            except:
                pass
            self.sleep(1)
        self.log.info("After rebalance operation docs in CBAS bucket : %s" %
                      items_in_cbas_bucket)
        if items_in_cbas_bucket < self.num_items * 2 and items_in_cbas_bucket > self.num_items:
            self.log.info("Data Ingestion Interrupted successfully")
        elif items_in_cbas_bucket < self.num_items:
            self.log.info(
                "Data Ingestion did interrupted and restarting from 0.")
        else:
            self.log.info(
                "Data Ingestion did not interrupted but complete before rebalance operation."
            )

        run_count = 0
        fail_count = 0
        success_count = 0
        aborted_count = 0
        shell = RemoteMachineShellConnection(self.master)
        for handle in handles:
            status, hand = self.cbas_util.retrieve_request_status_using_handle(
                self.master, handle, shell)
            if status == "running":
                run_count += 1
                self.log.info("query with handle %s is running." % handle)
            elif status == "failed":
                fail_count += 1
                self.log.info("query with handle %s is failed." % handle)
            elif status == "success":
                success_count += 1
                self.log.info("query with handle %s is successful." % handle)
            else:
                aborted_count += 1
                self.log.info("Queued job is deleted: %s" % status)

        self.log.info("After service restart %s queued jobs are Running." %
                      run_count)
        self.log.info("After service restart %s queued jobs are Failed." %
                      fail_count)
        self.log.info("After service restart %s queued jobs are Successful." %
                      success_count)
        self.log.info("After service restart %s queued jobs are Aborted." %
                      aborted_count)

        query = "select count(*) from {0};".format(self.cbas_dataset_name)
        self.cbas_util._run_concurrent_queries(query, "immediate", 100)

        if not self.cbas_util.validate_cbas_dataset_items_count(
                self.cbas_dataset_name, self.num_items * 2):
            self.fail(
                "No. of items in CBAS dataset do not match that in the CB bucket"
            )
        self.ingest_more_data()