def add_heketi_node_to_cluster(self, cluster_id):
        """Add new node to a cluster"""
        storage_host_info = g.config.get("additional_gluster_servers")
        if not storage_host_info:
            self.skipTest(
                "Skip test case as 'additional_gluster_servers' option is "
                "not provided in config file")

        storage_host_info = list(storage_host_info.values())[0]
        try:
            storage_hostname = storage_host_info["manage"]
            storage_ip = storage_host_info["storage"]
        except KeyError:
            msg = ("Config options 'additional_gluster_servers.manage' "
                   "and 'additional_gluster_servers.storage' must be set.")
            g.log.error(msg)
            raise exceptions.ConfigError(msg)

        h_client, h_server = self.heketi_client_node, self.heketi_server_url
        storage_zone = 1

        self.configure_node_to_run_gluster(storage_hostname)

        heketi_node_info = heketi_ops.heketi_node_add(h_client,
                                                      h_server,
                                                      storage_zone,
                                                      cluster_id,
                                                      storage_hostname,
                                                      storage_ip,
                                                      json=True)
        heketi_node_id = heketi_node_info["id"]
        self.addCleanup(heketi_ops.heketi_node_delete, h_client, h_server,
                        heketi_node_id)
        self.addCleanup(heketi_ops.heketi_node_remove, h_client, h_server,
                        heketi_node_id)
        self.addCleanup(heketi_ops.heketi_node_disable, h_client, h_server,
                        heketi_node_id)
        self.assertEqual(
            heketi_node_info["cluster"], cluster_id,
            "Node got added in unexpected cluster exp: %s, act: %s" %
            (cluster_id, heketi_node_info["cluster"]))

        return storage_hostname, storage_ip
    def test_heketi_metrics_validation_after_node(self, condition):
        """Validate heketi metrics after adding and remove node"""

        # Get additional node
        additional_host_info = g.config.get("additional_gluster_servers")
        if not additional_host_info:
            self.skipTest(
                "Skipping this test case as additional gluster server is "
                "not provied in config file")

        additional_host_info = list(additional_host_info.values())[0]
        storage_hostname = additional_host_info.get("manage")
        storage_ip = additional_host_info.get("storage")
        if not (storage_hostname and storage_ip):
            self.skipTest(
                "Config options 'additional_gluster_servers.manage' "
                "and 'additional_gluster_servers.storage' must be set.")

        h_client, h_server = self.heketi_client_node, self.heketi_server_url
        initial_node_count, final_node_count = 0, 0

        # Get initial node count from prometheus metrics
        metric_result = self._fetch_metric_from_promtheus_pod(
            metric='heketi_nodes_count')
        initial_node_count = reduce(
            lambda x, y: x + y,
            [result.get('value')[1] for result in metric_result])

        # Switch to storage project
        openshift_ops.switch_oc_project(
            self._master, self.storage_project_name)

        # Configure node before adding node
        self.configure_node_to_run_gluster(storage_hostname)

        # Get cluster list
        cluster_info = heketi_ops.heketi_cluster_list(
            h_client, h_server, json=True)

        # Add node to the cluster
        heketi_node_info = heketi_ops.heketi_node_add(
            h_client, h_server,
            len(self.gluster_servers), cluster_info.get('clusters')[0],
            storage_hostname, storage_ip, json=True)
        heketi_node_id = heketi_node_info.get("id")
        self.addCleanup(
            heketi_ops.heketi_node_delete,
            h_client, h_server, heketi_node_id, raise_on_error=False)
        self.addCleanup(
            heketi_ops.heketi_node_remove,
            h_client, h_server, heketi_node_id, raise_on_error=False)
        self.addCleanup(
            heketi_ops.heketi_node_disable,
            h_client, h_server, heketi_node_id, raise_on_error=False)
        self.addCleanup(
            openshift_ops.switch_oc_project,
            self._master, self.storage_project_name)

        if condition == 'delete':
            # Switch to openshift-monitoring project
            openshift_ops.switch_oc_project(
                self.ocp_master_node[0], self._prometheus_project_name)

            # Get initial node count from prometheus metrics
            for w in waiter.Waiter(timeout=60, interval=10):
                metric_result = self._fetch_metric_from_promtheus_pod(
                    metric='heketi_nodes_count')
                node_count = reduce(
                    lambda x, y: x + y,
                    [result.get('value')[1] for result in metric_result])
                if node_count != initial_node_count:
                    break

            if w.expired:
                raise exceptions.ExecutionError(
                    "Failed to get updated node details from prometheus")

            # Remove node from cluster
            heketi_ops.heketi_node_disable(h_client, h_server, heketi_node_id)
            heketi_ops.heketi_node_remove(h_client, h_server, heketi_node_id)
            for device in heketi_node_info.get('devices'):
                heketi_ops.heketi_device_delete(
                    h_client, h_server, device.get('id'))
            heketi_ops.heketi_node_delete(h_client, h_server, heketi_node_id)

        # Switch to openshift-monitoring project
        openshift_ops.switch_oc_project(
            self.ocp_master_node[0], self._prometheus_project_name)

        # Get final node count from prometheus metrics
        for w in waiter.Waiter(timeout=60, interval=10):
            metric_result = self._fetch_metric_from_promtheus_pod(
                metric='heketi_nodes_count')
            final_node_count = reduce(
                lambda x, y: x + y,
                [result.get('value')[1] for result in metric_result])

            if condition == 'delete':
                if final_node_count < node_count:
                    break
            else:
                if final_node_count > initial_node_count:
                    break

        if w.expired:
            raise exceptions.ExecutionError(
                "Failed to update node details in prometheus")
    def test_create_heketi_cluster_and_add_node(self):
        """Test heketi node add to a newly created cluster"""
        storage_host_info = g.config.get("additional_gluster_servers")
        if not storage_host_info:
            self.skipTest(
                "Skip test case as 'additional_gluster_servers' option is "
                "not provided in config file")

        storage_host_info = list(storage_host_info.values())[0]
        try:
            storage_hostname = storage_host_info["manage"]
            storage_ip = storage_host_info["storage"]
            storage_device = storage_host_info["devices"][0]
        except KeyError:
            msg = ("Config options 'additional_gluster_servers.manage' "
                   "'additional_gluster_servers.storage' and "
                   "'additional_gluster_servers.devices' "
                   "must be set.")
            g.log.error(msg)
            raise exceptions.ConfigError(msg)

        h_client, h_server = self.heketi_client_node, self.heketi_server_url
        storage_zone = 1

        cluster_id = heketi_ops.heketi_cluster_create(self.heketi_client_node,
                                                      self.heketi_server_url,
                                                      json=True)["id"]
        self.addCleanup(heketi_ops.heketi_cluster_delete,
                        self.heketi_client_node, self.heketi_server_url,
                        cluster_id)

        self.configure_node_to_run_gluster(storage_hostname)

        heketi_node_info = heketi_ops.heketi_node_add(h_client,
                                                      h_server,
                                                      storage_zone,
                                                      cluster_id,
                                                      storage_hostname,
                                                      storage_ip,
                                                      json=True)
        heketi_node_id = heketi_node_info["id"]
        self.addCleanup(heketi_ops.heketi_node_delete, h_client, h_server,
                        heketi_node_id)
        self.addCleanup(heketi_ops.heketi_node_remove, h_client, h_server,
                        heketi_node_id)
        self.addCleanup(heketi_ops.heketi_node_disable, h_client, h_server,
                        heketi_node_id)
        self.assertEqual(
            heketi_node_info["cluster"], cluster_id,
            "Node got added in unexpected cluster exp: %s, act: %s" %
            (cluster_id, heketi_node_info["cluster"]))

        heketi_ops.heketi_device_add(h_client, h_server, storage_device,
                                     heketi_node_id)
        heketi_node_info = heketi_ops.heketi_node_info(h_client,
                                                       h_server,
                                                       heketi_node_id,
                                                       json=True)
        device_id = None
        for device in heketi_node_info["devices"]:
            if device["name"] == storage_device:
                device_id = device["id"]
                break
        err_msg = ("Failed to add device %s on node %s" %
                   (storage_device, heketi_node_id))
        self.assertTrue(device_id, err_msg)

        self.addCleanup(heketi_ops.heketi_device_delete, h_client, h_server,
                        device_id)
        self.addCleanup(heketi_ops.heketi_device_remove, h_client, h_server,
                        device_id)
        self.addCleanup(heketi_ops.heketi_device_disable, h_client, h_server,
                        device_id)

        cluster_info = heketi_ops.heketi_cluster_info(h_client,
                                                      h_server,
                                                      cluster_id,
                                                      json=True)
        self.assertIn(
            heketi_node_info["id"], cluster_info["nodes"],
            "Newly added node %s not found in cluster %s, cluster info %s" %
            (heketi_node_info["id"], cluster_id, cluster_info))

        topology_info = heketi_ops.heketi_topology_info(h_client,
                                                        h_server,
                                                        json=True)

        cluster_details = [
            cluster for cluster in topology_info["clusters"]
            if cluster["id"] == cluster_id
        ]
        err_msg = "Cluster details for id '%s' not found" % cluster_id
        self.assertTrue(cluster_details, err_msg)
        err_msg = ("Multiple clusters with same id '%s' found %s" %
                   (cluster_id, cluster_details))
        self.assertEqual(len(cluster_details), 1, err_msg)

        node_details = [
            node for node in cluster_details[0]["nodes"]
            if node["id"] == heketi_node_id
        ]
        err_msg = "Node details for id '%s' not found" % heketi_node_id
        self.assertTrue(node_details, err_msg)
        err_msg = ("Multiple nodes with same id '%s' found %s" %
                   (heketi_node_id, node_details))
        self.assertEqual(len(node_details), 1, err_msg)

        err_msg = "Unexpected %s found '%s', expected '%s'"
        exp_storage_hostname = node_details[0]["hostnames"]["manage"][0]
        self.assertEqual(
            exp_storage_hostname, storage_hostname, err_msg % (
                "hostname",
                exp_storage_hostname,
                storage_hostname,
            ))
        exp_storage_ip = node_details[0]["hostnames"]["storage"][0]
        self.assertEqual(exp_storage_ip, storage_ip,
                         err_msg % ("IP address", exp_storage_ip, storage_ip))
        zone = node_details[0]["zone"]
        self.assertEqual(zone, storage_zone,
                         err_msg % ("zone", zone, storage_zone))
Example #4
0
    def test_dev_path_mapping_heketi_node_delete(self):
        """Validate dev path mapping for heketi node deletion lifecycle"""
        h_client, h_url = self.heketi_client_node, self.heketi_server_url

        node_ids = heketi_ops.heketi_node_list(h_client, h_url)
        self.assertTrue(node_ids, "Failed to get heketi node list")

        # Fetch #4th node for the operations
        h_disable_node = node_ids[3]

        # Fetch bricks on the devices before volume create
        h_node_details_before, h_node = self._get_bricks_and_device_details()

        # Bricks count on the node before pvc creation
        brick_count_before = [count[1] for count in h_node_details_before]

        # Create file volume with app pod and verify IO's
        # and compare path, UUID, vg_name
        pod_name, dc_name, use_percent = self._create_app_pod_and_verify_pvs()

        # Check if IO's are running
        use_percent_after = self._get_space_use_percent_in_app_pod(pod_name)
        self.assertNotEqual(
            use_percent, use_percent_after,
            "Failed to execute IO's in the app pod {} after respin".format(
                pod_name))

        # Fetch bricks on the devices after volume create
        h_node_details_after, h_node = self._get_bricks_and_device_details()

        # Bricks count on the node after pvc creation
        brick_count_after = [count[1] for count in h_node_details_after]

        self.assertGreater(
            sum(brick_count_after), sum(brick_count_before),
            "Failed to add bricks on the node {}".format(h_node))
        self.addCleanup(heketi_ops.heketi_node_disable, h_client, h_url,
                        h_disable_node)

        # Enable the #4th node
        heketi_ops.heketi_node_enable(h_client, h_url, h_disable_node)
        node_info = heketi_ops.heketi_node_info(h_client,
                                                h_url,
                                                h_disable_node,
                                                json=True)
        h_node_id = node_info['id']
        self.assertEqual(node_info['state'], "online",
                         "Failed to enable node {}".format(h_disable_node))

        # Disable the node and check for brick migrations
        self.addCleanup(heketi_ops.heketi_node_enable,
                        h_client,
                        h_url,
                        h_node,
                        raise_on_error=False)
        heketi_ops.heketi_node_disable(h_client, h_url, h_node)
        node_info = heketi_ops.heketi_node_info(h_client,
                                                h_url,
                                                h_node,
                                                json=True)
        self.assertEqual(node_info['state'], "offline",
                         "Failed to disable node {}".format(h_node))

        # Before bricks migration
        h_node_info = heketi_ops.heketi_node_info(h_client,
                                                  h_url,
                                                  h_node,
                                                  json=True)

        # Bricks before migration on the node i.e to be deleted
        bricks_counts_before = 0
        for device in h_node_info['devices']:
            bricks_counts_before += (len(device['bricks']))

        # Remove the node
        heketi_ops.heketi_node_remove(h_client, h_url, h_node)

        # After bricks migration
        h_node_info_after = heketi_ops.heketi_node_info(h_client,
                                                        h_url,
                                                        h_node,
                                                        json=True)

        # Bricks after migration on the node i.e to be delete
        bricks_counts = 0
        for device in h_node_info_after['devices']:
            bricks_counts += (len(device['bricks']))

        self.assertFalse(
            bricks_counts,
            "Failed to remove all the bricks from node {}".format(h_node))

        # Old node which is to deleted, new node were bricks resides
        old_node, new_node = h_node, h_node_id

        # Node info for the new node were brick reside after migration
        h_node_info_new = heketi_ops.heketi_node_info(h_client,
                                                      h_url,
                                                      new_node,
                                                      json=True)

        bricks_counts_after = 0
        for device in h_node_info_new['devices']:
            bricks_counts_after += (len(device['bricks']))

        self.assertEqual(
            bricks_counts_before, bricks_counts_after,
            "Failed to migrated bricks from {} node to  {}".format(
                old_node, new_node))

        # Fetch device list i.e to be deleted
        h_node_info = heketi_ops.heketi_node_info(h_client,
                                                  h_url,
                                                  h_node,
                                                  json=True)
        devices_list = [[device['id'], device['name']]
                        for device in h_node_info['devices']]

        for device in devices_list:
            device_id = device[0]
            device_name = device[1]
            self.addCleanup(heketi_ops.heketi_device_add,
                            h_client,
                            h_url,
                            device_name,
                            h_node,
                            raise_on_error=False)

            # Device deletion from heketi node
            device_delete = heketi_ops.heketi_device_delete(
                h_client, h_url, device_id)
            self.assertTrue(device_delete,
                            "Failed to delete the device {}".format(device_id))

        node_info = heketi_ops.heketi_node_info(h_client,
                                                h_url,
                                                h_node,
                                                json=True)
        cluster_id = node_info['cluster']
        zone = node_info['zone']
        storage_hostname = node_info['hostnames']['manage'][0]
        storage_ip = node_info['hostnames']['storage'][0]

        # Delete the node
        self.addCleanup(heketi_ops.heketi_node_add,
                        h_client,
                        h_url,
                        zone,
                        cluster_id,
                        storage_hostname,
                        storage_ip,
                        raise_on_error=False)
        heketi_ops.heketi_node_delete(h_client, h_url, h_node)

        # Verify if the node is deleted
        node_ids = heketi_ops.heketi_node_list(h_client, h_url)
        self.assertNotIn(old_node, node_ids,
                         "Failed to delete the node {}".format(old_node))

        # Check if IO's are running
        use_percent_after = self._get_space_use_percent_in_app_pod(pod_name)
        self.assertNotEqual(
            use_percent, use_percent_after,
            "Failed to execute IO's in the app pod {} after respin".format(
                pod_name))

        # Adding node back
        h_node_info = heketi_ops.heketi_node_add(h_client,
                                                 h_url,
                                                 zone,
                                                 cluster_id,
                                                 storage_hostname,
                                                 storage_ip,
                                                 json=True)
        self.assertTrue(
            h_node_info,
            "Failed to add the node in the cluster {}".format(cluster_id))
        h_node_id = h_node_info["id"]

        # Adding devices to the new node
        for device in devices_list:
            storage_device = device[1]

            # Add device to the new heketi node
            heketi_ops.heketi_device_add(h_client, h_url, storage_device,
                                         h_node_id)
            heketi_node_info = heketi_ops.heketi_node_info(h_client,
                                                           h_url,
                                                           h_node_id,
                                                           json=True)
            device_id = None
            for device in heketi_node_info["devices"]:
                if device["name"] == storage_device:
                    device_id = device["id"]
                    break

            self.assertTrue(
                device_id, "Failed to add device {} on node {}".format(
                    storage_device, h_node_id))

        # Create n pvc in order to verfiy if the bricks reside on the new node
        pvc_amount, pvc_size = 5, 1

        # Fetch bricks on the devices before volume create
        h_node_details_before, h_node = self._get_bricks_and_device_details()

        # Bricks count on the node before pvc creation
        brick_count_before = [count[1] for count in h_node_details_before]

        # Create file volumes
        pvc_name = self.create_and_wait_for_pvcs(pvc_size=pvc_size,
                                                 pvc_amount=pvc_amount)
        self.assertEqual(len(pvc_name), pvc_amount,
                         "Failed to create {} pvc".format(pvc_amount))

        # Fetch bricks on the devices before volume create
        h_node_details_after, h_node = self._get_bricks_and_device_details()

        # Bricks count on the node after pvc creation
        brick_count_after = [count[1] for count in h_node_details_after]

        self.assertGreater(
            sum(brick_count_after), sum(brick_count_before),
            "Failed to add bricks on the new node {}".format(new_node))

        # Check if IO's are running after new node is added
        use_percent_after = self._get_space_use_percent_in_app_pod(pod_name)
        self.assertNotEqual(
            use_percent, use_percent_after,
            "Failed to execute IO's in the app pod {} after respin".format(
                pod_name))