def test_brick_evict_on_three_node_with_one_down(self):
        """Test brick evict basic functionality and verify brick evict
        will fail after node down if nodes are three"""

        h_node, h_server = self.heketi_client_node, self.heketi_server_url

        # Disable node if more than 3
        node_list = heketi_ops.heketi_node_list(h_node, h_server)
        if len(node_list) > 3:
            for node_id in node_list[3:]:
                heketi_ops.heketi_node_disable(h_node, h_server, node_id)
                self.addCleanup(heketi_ops.heketi_node_enable, h_node,
                                h_server, node_id)

        # Create heketi volume
        vol_info = heketi_ops.heketi_volume_create(h_node,
                                                   h_server,
                                                   1,
                                                   json=True)
        self.addCleanup(heketi_ops.heketi_volume_delete, h_node, h_server,
                        vol_info.get('id'))

        # Get node on which heketi pod is scheduled
        heketi_pod = openshift_ops.get_pod_name_from_dc(
            self.ocp_client, self.heketi_dc_name)
        heketi_node = openshift_ops.oc_get_custom_resource(
            self.ocp_client, 'pod', '.:spec.nodeName', heketi_pod)[0]

        # Get list of hostname from node id
        host_list = []
        for node_id in node_list[3:]:
            node_info = heketi_ops.heketi_node_info(h_node,
                                                    h_server,
                                                    node_id,
                                                    json=True)
            host_list.append(node_info.get('hostnames').get('manage')[0])

        # Get brick id and glusterfs node which is not heketi node
        for node in vol_info.get('bricks', {}):
            node_info = heketi_ops.heketi_node_info(h_node,
                                                    h_server,
                                                    node.get('node'),
                                                    json=True)
            hostname = node_info.get('hostnames').get('manage')[0]
            if (hostname != heketi_node) and (hostname not in host_list):
                brick_id = node.get('id')
                break

        self._power_off_node_and_wait_node_to_be_not_ready(hostname)

        # Perform brick evict operation
        try:
            heketi_ops.heketi_brick_evict(h_node, h_server, brick_id)
        except AssertionError as e:
            if ('No Replacement was found' not in six.text_type(e)):
                raise
    def delete_node_and_devices_on_it(self, node_id):

        heketi_ops.heketi_node_disable(self.h_node, self.h_url, node_id)
        heketi_ops.heketi_node_remove(self.h_node, self.h_url, node_id)
        node_info = heketi_ops.heketi_node_info(
            self.h_node, self.h_url, node_id, json=True)
        for device in node_info['devices']:
            heketi_ops.heketi_device_delete(
                self.h_node, self.h_url, device['id'])
        heketi_ops.heketi_node_delete(self.h_node, self.h_url, node_id)
    def get_ready_for_node_add(self, hostname):
        self.configure_node_to_run_gluster(hostname)

        h_nodes = heketi_ops.heketi_node_list(self.h_node, self.h_url)

        # Disable nodes except first two nodes
        for node_id in h_nodes[2:]:
            heketi_ops.heketi_node_disable(self.h_node, self.h_url, node_id)
            self.addCleanup(heketi_ops.heketi_node_enable, self.h_node,
                            self.h_url, node_id)
    def test_heketi_node_states_enable_disable(self):
        """Test node enable and disable functionality
        """
        h_client, h_server = self.heketi_client_node, self.heketi_server_url

        node_list = heketi_ops.heketi_node_list(h_client, h_server)
        online_hosts = []
        for node_id in node_list:
            node_info = heketi_ops.heketi_node_info(h_client,
                                                    h_server,
                                                    node_id,
                                                    json=True)
            if node_info["state"] == "online":
                online_hosts.append(node_info)

        if len(online_hosts) < 3:
            raise self.skipTest(
                "This test can run only if online hosts are more than 2")

        #  Disable n-3 nodes, in case we have n nodes
        for node_info in online_hosts[3:]:
            node_id = node_info["id"]
            heketi_ops.heketi_node_disable(h_client, h_server, node_id)
            self.addCleanup(heketi_ops.heketi_node_enable, h_client, h_server,
                            node_id)

        # Create volume when 3 nodes are online
        vol_size = 1
        vol_info = heketi_ops.heketi_volume_create(h_client,
                                                   h_server,
                                                   vol_size,
                                                   json=True)
        self.addCleanup(heketi_ops.heketi_volume_delete, h_client, h_server,
                        vol_info['id'])

        node_id = online_hosts[0]['id']
        try:
            heketi_ops.heketi_node_disable(h_client, h_server, node_id)

            # Try to create a volume, volume creation should fail
            with self.assertRaises(AssertionError):
                heketi_volume = heketi_ops.heketi_volume_create(
                    h_client, h_server, vol_size)
                self.addCleanup(heketi_ops.heketi_volume_delete, h_client,
                                h_server, heketi_volume["id"])
        finally:
            # Enable heketi node
            heketi_ops.heketi_node_enable(h_client, h_server, node_id)

        # Create volume when heketi node is enabled
        vol_info = heketi_ops.heketi_volume_create(h_client,
                                                   h_server,
                                                   vol_size,
                                                   json=True)
        heketi_ops.heketi_volume_delete(h_client, h_server, vol_info['id'])
    def test_brick_multiplex_pids_with_diff_vol_option_values(self):
        """Test Brick Pid's should be same when values of vol options are diff
        """
        h_client, h_url = self.heketi_client_node, self.heketi_server_url
        # Disable heketi nodes except first three nodes
        h_nodes_list = heketi_node_list(h_client, h_url)
        for node_id in h_nodes_list[3:]:
            heketi_node_disable(h_client, h_url, node_id)
            self.addCleanup(heketi_node_enable, h_client, h_url, node_id)

        # Create storage class with diff volumeoptions
        sc1 = self.create_storage_class(volumeoptions='user.heketi.abc 1')
        sc2 = self.create_storage_class(volumeoptions='user.heketi.abc 2')
        # Create PVC's with above SC
        pvc1 = self.create_and_wait_for_pvcs(sc_name=sc1)
        pvc2 = self.create_and_wait_for_pvcs(sc_name=sc2)

        # Get vol info and status
        vol_info1 = get_gluster_vol_info_by_pvc_name(self.node, pvc1[0])
        vol_info2 = get_gluster_vol_info_by_pvc_name(self.node, pvc2[0])
        vol_status1 = get_gluster_vol_status(vol_info1['gluster_vol_id'])
        vol_status2 = get_gluster_vol_status(vol_info2['gluster_vol_id'])

        # Verify vol options
        err_msg = ('Volume option "user.heketi.abc %s" did not got match for '
                   'volume %s in gluster vol info')
        self.assertEqual(
            vol_info1['options']['user.heketi.abc'], '1',
            err_msg % (1, vol_info1['gluster_vol_id']))
        self.assertEqual(
            vol_info2['options']['user.heketi.abc'], '2',
            err_msg % (2, vol_info2['gluster_vol_id']))

        # Get the PID's and match them
        pids1 = set()
        for brick in vol_info1['bricks']['brick']:
            host, bname = brick['name'].split(":")
            pids1.add(vol_status1[host][bname]['pid'])

        pids2 = set()
        for brick in vol_info2['bricks']['brick']:
            host, bname = brick['name'].split(":")
            pids2.add(vol_status2[host][bname]['pid'])

        err_msg = ('Pids of both the volumes %s and %s are expected to be'
                   'same. But got the different Pids "%s" and "%s".' %
                   (vol_info1['gluster_vol_id'], vol_info2['gluster_vol_id'],
                    pids1, pids2))
        self.assertEqual(pids1, pids2, err_msg)
Beispiel #6
0
    def get_online_nodes_disable_redundant(self):
        """
        Find online nodes and disable n-3 nodes and return
        list of online nodes
        """
        node_list = heketi_node_list(self.heketi_client_node,
                                     self.heketi_server_url)
        self.assertTrue(node_list, "Failed to list heketi nodes")
        g.log.info("Successfully got the list of nodes")
        # Fetch online nodes  from node list
        online_hosts = []

        for node in node_list:
            node_info = heketi_node_info(
                self.heketi_client_node, self.heketi_server_url,
                node, json=True)
            if node_info["state"] == "online":
                online_hosts.append(node_info)

        # Skip test if online node count is less than 3i
        if len(online_hosts) < 3:
            raise self.skipTest(
                "This test can run only if online hosts are more than 2")
        # if we have n nodes, disable n-3 nodes
        for node_info in online_hosts[3:]:
            node_id = node_info["id"]
            g.log.info("going to disable node id %s", node_id)
            heketi_node_disable(self.heketi_client_node,
                                self.heketi_server_url,
                                node_id)
            self.addCleanup(heketi_node_enable,
                            self.heketi_client_node,
                            self.heketi_server_url,
                            node_id)

        for host in online_hosts[1:3]:
            found_online = False
            for device in host["devices"]:
                if device["state"].strip().lower() == "online":
                    found_online = True
                    break
            if not found_online:
                self.skipTest(("no device online on node %s" % host["id"]))

        return online_hosts
    def disable_node(self, node_id):
        """
        Disable node through heketi-cli.

        :param node_id: str node ID
        """
        out = heketi_node_disable(self.heketi_client_node,
                                  self.heketi_server_url, node_id)

        self.assertNotEqual(out, False, "Failed to disable node of"
                            " id %s" % node_id)
Beispiel #8
0
    def setUp(self):
        super(TestDevPathMapping, self).setUp()
        self.node = self.ocp_master_node[0]
        self.h_node, self.h_server = (self.heketi_client_node,
                                      self.heketi_server_url)
        h_nodes_list = heketi_ops.heketi_node_list(self.h_node, self.h_server)
        h_node_count = len(h_nodes_list)
        if h_node_count < 3:
            self.skipTest(
                "At least 3 nodes are required, found {}".format(h_node_count))

        # Disable 4th and other nodes
        for node_id in h_nodes_list[3:]:
            self.addCleanup(heketi_ops.heketi_node_enable, self.h_node,
                            self.h_server, node_id)
            heketi_ops.heketi_node_disable(self.h_node, self.h_server, node_id)

        h_info = heketi_ops.heketi_node_info(self.h_node,
                                             self.h_server,
                                             h_nodes_list[0],
                                             json=True)
        self.assertTrue(
            h_info, "Failed to get the heketi node info for node id"
            " {}".format(h_nodes_list[0]))

        self.node_ip = h_info['hostnames']['storage'][0]
        self.node_hostname = h_info["hostnames"]["manage"][0]
        self.vm_name = node_ops.find_vm_name_by_ip_or_hostname(
            self.node_hostname)
        self.devices_list = [device['name'] for device in h_info["devices"]]

        # Get list of additional devices for one of the Gluster nodes
        for gluster_server in list(g.config["gluster_servers"].values()):
            if gluster_server['storage'] == self.node_ip:
                additional_device = gluster_server.get("additional_devices")
                if additional_device:
                    self.devices_list.extend(additional_device)

        # sort the devices list
        self.devices_list.sort()
    def disable_node(self, node_id):
        """
        Disable node through heketi-cli.

        :param node_id: str node ID
        """
        out = heketi_node_disable(self.heketi_client_node,
                                  self.heketi_server_url,
                                  node_id)

        self.assertNotEqual(out, False,
                            "Failed to disable node of"
                            " id %s" % node_id)
Beispiel #10
0
    def _get_vol_size(self):
        # Get available free space disabling redundant nodes
        min_free_space_gb = 5
        heketi_url = self.heketi_server_url
        node_ids = heketi_node_list(self.heketi_client_node, heketi_url)
        self.assertTrue(node_ids)
        nodes = {}
        min_free_space = min_free_space_gb * 1024**2
        for node_id in node_ids:
            node_info = heketi_node_info(self.heketi_client_node,
                                         heketi_url,
                                         node_id,
                                         json=True)
            if (node_info['state'].lower() != 'online'
                    or not node_info['devices']):
                continue
            if len(nodes) > 2:
                out = heketi_node_disable(self.heketi_client_node, heketi_url,
                                          node_id)
                self.assertTrue(out)
                self.addCleanup(heketi_node_enable, self.heketi_client_node,
                                heketi_url, node_id)
            for device in node_info['devices']:
                if device['state'].lower() != 'online':
                    continue
                free_space = device['storage']['free']
                if free_space < min_free_space:
                    out = heketi_device_disable(self.heketi_client_node,
                                                heketi_url, device['id'])
                    self.assertTrue(out)
                    self.addCleanup(heketi_device_enable,
                                    self.heketi_client_node, heketi_url,
                                    device['id'])
                    continue
                if node_id not in nodes:
                    nodes[node_id] = []
                nodes[node_id].append(device['storage']['free'])

        # Skip test if nodes requirements are not met
        if (len(nodes) < 3
                or not all(map(
                    (lambda _list: len(_list) > 1), nodes.values()))):
            raise self.skipTest("Could not find 3 online nodes with, "
                                "at least, 2 online devices having free space "
                                "bigger than %dGb." % min_free_space_gb)

        # Calculate size of a potential distributed vol
        vol_size_gb = int(min(map(max, nodes.values())) / (1024**2)) + 1
        return vol_size_gb
    def _available_disk_free_space(self):
        min_free_space_gb = 3
        # Get available free space disabling redundant devices and nodes
        heketi_url = self.heketi_server_url
        node_id_list = heketi_ops.heketi_node_list(self.heketi_client_node,
                                                   heketi_url)
        self.assertTrue(node_id_list)
        nodes = {}
        min_free_space = min_free_space_gb * 1024**2
        for node_id in node_id_list:
            node_info = heketi_ops.heketi_node_info(self.heketi_client_node,
                                                    heketi_url,
                                                    node_id,
                                                    json=True)
            if (node_info['state'].lower() != 'online'
                    or not node_info['devices']):
                continue
            if len(nodes) > 2:
                self.addCleanup(heketi_ops.heketi_node_enable,
                                self.heketi_client_node, heketi_url, node_id)
                out = heketi_ops.heketi_node_disable(self.heketi_client_node,
                                                     heketi_url, node_id)
                self.assertTrue(out)

            for device in node_info['devices']:
                if device['state'].lower() != 'online':
                    continue
                free_space = device['storage']['free']
                if (node_id in nodes.keys() or free_space < min_free_space):
                    out = heketi_ops.heketi_device_disable(
                        self.heketi_client_node, heketi_url, device['id'])
                    self.assertTrue(out)
                    self.addCleanup(heketi_ops.heketi_device_enable,
                                    self.heketi_client_node, heketi_url,
                                    device['id'])
                    continue
                nodes[node_id] = free_space
        if len(nodes) < 3:
            raise self.skipTest("Could not find 3 online nodes with, "
                                "at least, 1 online device having free space "
                                "bigger than %dGb." % min_free_space_gb)

        # Calculate maximum available size for PVC
        available_size_gb = int(min(nodes.values()) / (1024**2))
        return available_size_gb
Beispiel #12
0
    def test_create_volumes_enabling_and_disabling_heketi_devices(self):
        """Validate enable/disable of heketi device"""

        # Get nodes info
        node_id_list = heketi_ops.heketi_node_list(self.heketi_client_node,
                                                   self.heketi_server_url)
        node_info_list = []
        for node_id in node_id_list[0:3]:
            node_info = heketi_ops.heketi_node_info(self.heketi_client_node,
                                                    self.heketi_server_url,
                                                    node_id,
                                                    json=True)
            node_info_list.append(node_info)

        # Disable 4th and other nodes
        if len(node_id_list) > 3:
            for node_id in node_id_list[3:]:
                heketi_ops.heketi_node_disable(self.heketi_client_node,
                                               self.heketi_server_url, node_id)
                self.addCleanup(heketi_ops.heketi_node_enable,
                                self.heketi_client_node,
                                self.heketi_server_url, node_id)

        # Disable second and other devices on the first 3 nodes
        for node_info in node_info_list[0:3]:
            devices = node_info["devices"]
            self.assertTrue(
                devices, "Node '%s' does not have devices." % node_info["id"])
            if devices[0]["state"].strip().lower() != "online":
                self.skipTest("Test expects first device to be enabled.")
            if len(devices) < 2:
                continue
            for device in node_info["devices"][1:]:
                out = heketi_ops.heketi_device_disable(self.heketi_client_node,
                                                       self.heketi_server_url,
                                                       device["id"])
                self.assertTrue(
                    out, "Failed to disable the device %s" % device["id"])
                self.addCleanup(heketi_ops.heketi_device_enable,
                                self.heketi_client_node,
                                self.heketi_server_url, device["id"])

        # Create heketi volume
        out = heketi_ops.heketi_volume_create(self.heketi_client_node,
                                              self.heketi_server_url,
                                              1,
                                              json=True)
        self.assertTrue(out, "Failed to create heketi volume of size 1")
        g.log.info("Successfully created heketi volume of size 1")
        device_id = out["bricks"][0]["device"]
        self.addCleanup(heketi_ops.heketi_volume_delete,
                        self.heketi_client_node, self.heketi_server_url,
                        out["bricks"][0]["volume"])

        # Disable device
        g.log.info("Disabling '%s' device" % device_id)
        out = heketi_ops.heketi_device_disable(self.heketi_client_node,
                                               self.heketi_server_url,
                                               device_id)
        self.assertTrue(out, "Failed to disable the device %s" % device_id)
        g.log.info("Successfully disabled device %s" % device_id)

        try:
            # Get device info
            g.log.info("Retrieving '%s' device info" % device_id)
            out = heketi_ops.heketi_device_info(self.heketi_client_node,
                                                self.heketi_server_url,
                                                device_id,
                                                json=True)
            self.assertTrue(out, "Failed to get device info %s" % device_id)
            g.log.info("Successfully retrieved device info %s" % device_id)
            name = out["name"]
            self.assertEqual(out["state"].lower().strip(), "offline",
                             "Device %s is not in offline state." % name)
            g.log.info("Device %s is now offine" % name)

            # Try to create heketi volume
            g.log.info("Creating heketi volume: Expected to fail.")
            try:
                out = heketi_ops.heketi_volume_create(self.heketi_client_node,
                                                      self.heketi_server_url,
                                                      1,
                                                      json=True)
            except AssertionError:
                g.log.info("Volume was not created as expected.")
            else:
                self.addCleanup(heketi_ops.heketi_volume_delete,
                                self.heketi_client_node,
                                self.heketi_server_url,
                                out["bricks"][0]["volume"])
                msg = "Volume unexpectedly created. Out: %s" % out
                assert False, msg
        finally:
            # Enable the device back
            g.log.info("Enable '%s' device back." % device_id)
            out = heketi_ops.heketi_device_enable(self.heketi_client_node,
                                                  self.heketi_server_url,
                                                  device_id)
            self.assertTrue(out, "Failed to enable the device %s" % device_id)
            g.log.info("Successfully enabled device %s" % device_id)

        # Get device info
        out = heketi_ops.heketi_device_info(self.heketi_client_node,
                                            self.heketi_server_url,
                                            device_id,
                                            json=True)
        self.assertTrue(out, ("Failed to get device info %s" % device_id))
        g.log.info("Successfully retrieved device info %s" % device_id)
        name = out["name"]
        self.assertEqual(out["state"], "online",
                         "Device %s is not in online state." % name)

        # Create heketi volume of size
        out = heketi_ops.heketi_volume_create(self.heketi_client_node,
                                              self.heketi_server_url,
                                              1,
                                              json=True)
        self.assertTrue(out, "Failed to create volume of size 1")
        self.addCleanup(heketi_ops.heketi_volume_delete,
                        self.heketi_client_node, self.heketi_server_url,
                        out["bricks"][0]["volume"])
        g.log.info("Successfully created volume of size 1")
        name = out["name"]

        # Get gluster volume info
        vol_info = get_volume_info('auto_get_gluster_endpoint', volname=name)
        self.assertTrue(vol_info, "Failed to get '%s' volume info." % name)
        g.log.info("Successfully got the '%s' volume info." % name)
Beispiel #13
0
    def test_heketi_manual_cleanup_operation_in_bhv(self):
        """Validate heketi db cleanup will resolve the mismatch
           in the free size of the block hosting volume with failed
           block device create operations.
        """
        bhv_size_before, bhv_size_after, vol_count = [], [], 5
        ocp_node, g_node = self.ocp_master_node[0], self.gluster_servers[0]
        h_node, h_url = self.heketi_client_node, self.heketi_server_url

        # Get existing heketi volume list
        existing_volumes = heketi_volume_list(h_node, h_url, json=True)

        # Add function to clean stale volumes created during test
        self.addCleanup(self._cleanup_heketi_volumes,
                        existing_volumes.get("volumes"))

        # Get nodes id list
        node_id_list = heketi_node_list(h_node, h_url)

        # Disable 4th and other nodes
        for node_id in node_id_list[3:]:
            heketi_node_disable(h_node, h_url, node_id)
            self.addCleanup(heketi_node_enable, h_node, h_url, node_id)

        # Calculate heketi volume size
        free_space, nodenum = get_total_free_space(h_node, h_url)
        free_space_available = int(free_space / nodenum)
        if free_space_available > vol_count:
            h_volume_size = int(free_space_available / vol_count)
            if h_volume_size > 50:
                h_volume_size = 50
        else:
            h_volume_size, vol_count = 1, free_space_available

        # Create BHV in case blockvolume size is greater than default BHV size
        default_bhv_size = get_default_block_hosting_volume_size(
            h_node, self.heketi_dc_name)
        if default_bhv_size < h_volume_size:
            h_volume_name = "autotest-{}".format(utils.get_random_str())
            bhv_info = self.create_heketi_volume_with_name_and_wait(
                h_volume_name,
                free_space_available,
                raise_on_cleanup_error=False,
                block=True,
                json=True)
            free_space_available -= (
                int(bhv_info.get("blockinfo").get("reservedsize")) + 1)
            h_volume_size = int(free_space_available / vol_count)

        # Get BHV list
        h_bhv_list = get_block_hosting_volume_list(h_node, h_url).keys()
        self.assertTrue(h_bhv_list, "Failed to get the BHV list")

        # Get BHV size
        for bhv in h_bhv_list:
            vol_info = heketi_volume_info(h_node, h_url, bhv, json=True)
            bhv_vol_size_before = vol_info.get("freesize")
            bhv_size_before.append(bhv_vol_size_before)

        # Kill Tcmu-runner service
        services = ("tcmu-runner", "gluster-block-target", "gluster-blockd")
        kill_service_on_gluster_pod_or_node(ocp_node, "tcmu-runner", g_node)

        # Restart the services
        for service in services:
            state = ('exited'
                     if service == 'gluster-block-target' else 'running')
            self.addCleanup(wait_for_service_status_on_gluster_pod_or_node,
                            ocp_node, service, 'active', state, g_node)
            self.addCleanup(restart_service_on_gluster_pod_or_node, ocp_node,
                            service, g_node)

        def run_async(cmd, hostname, raise_on_error=True):
            return g.run_async(host=hostname, command=cmd)

        # Create stale block volumes in async
        for count in range(vol_count):
            with mock.patch.object(json, 'loads', side_effect=(lambda j: j)):
                with mock.patch.object(command,
                                       'cmd_run',
                                       side_effect=run_async):
                    heketi_blockvolume_create(h_node,
                                              h_url,
                                              h_volume_size,
                                              json=True)

        # Wait for pending operation to get generated
        self._check_for_pending_operations(h_node, h_url)

        # Restart the services
        for service in services:
            state = ('exited'
                     if service == 'gluster-block-target' else 'running')
            restart_service_on_gluster_pod_or_node(ocp_node, service, g_node)
            wait_for_service_status_on_gluster_pod_or_node(
                ocp_node, service, 'active', state, g_node)

        # Cleanup pending operation
        heketi_server_operation_cleanup(h_node, h_url)

        # wait for pending operation to get cleaned up
        for w in waiter.Waiter(timeout=120, interval=10):
            # Get BHV size
            for bhv in h_bhv_list:
                vol_info = heketi_volume_info(h_node, h_url, bhv, json=True)
                bhv_vol_size_after = vol_info.get("freesize")
                bhv_size_after.append(bhv_vol_size_after)

            if (set(bhv_size_before) == set(bhv_size_after)):
                break
        if w.expired:
            raise exceptions.ExecutionError(
                "Failed to Validate volume size Actual:{},"
                " Expected:{}".format(set(bhv_size_before),
                                      set(bhv_size_after)))
Beispiel #14
0
    def test_brick_evict_with_node_down(self):
        """Test brick evict basic functionality and verify brick evict
        after node down"""

        h_node, h_server = self.heketi_client_node, self.heketi_server_url

        # Disable node if more than 3
        node_list = heketi_ops.heketi_node_list(h_node, h_server)
        if len(node_list) > 3:
            for node_id in node_list[3:]:
                heketi_ops.heketi_node_disable(h_node, h_server, node_id)
                self.addCleanup(heketi_ops.heketi_node_enable, h_node,
                                h_server, node_id)

        # Create heketi volume
        vol_info = heketi_ops.heketi_volume_create(h_node,
                                                   h_server,
                                                   1,
                                                   json=True)
        self.addCleanup(heketi_ops.heketi_volume_delete, h_node, h_server,
                        vol_info.get('id'))

        # Get node on which heketi pod is scheduled
        heketi_pod = openshift_ops.get_pod_name_from_dc(
            self.ocp_client, self.heketi_dc_name)
        heketi_node = openshift_ops.oc_get_custom_resource(
            self.ocp_client, 'pod', '.:spec.nodeName', heketi_pod)[0]

        # Get list of hostname from node id
        host_list = []
        for node_id in node_list[3:]:
            node_info = heketi_ops.heketi_node_info(h_node,
                                                    h_server,
                                                    node_id,
                                                    json=True)
            host_list.append(node_info.get('hostnames').get('manage')[0])

        # Get brick id and glusterfs node which is not heketi node
        for node in vol_info.get('bricks', {}):
            node_info = heketi_ops.heketi_node_info(h_node,
                                                    h_server,
                                                    node.get('node'),
                                                    json=True)
            hostname = node_info.get('hostnames').get('manage')[0]
            if (hostname != heketi_node) and (hostname not in host_list):
                brick_id = node.get('id')
                break

        # Bring down the glusterfs node
        vm_name = node_ops.find_vm_name_by_ip_or_hostname(hostname)
        self.addCleanup(self._wait_for_gluster_pod_after_node_reboot, hostname)
        self.addCleanup(node_ops.power_on_vm_by_name, vm_name)
        node_ops.power_off_vm_by_name(vm_name)

        # Wait glusterfs node to become NotReady
        custom = r'":.status.conditions[?(@.type==\"Ready\")]".status'
        for w in waiter.Waiter(300, 20):
            status = openshift_ops.oc_get_custom_resource(
                self.ocp_client, 'node', custom, hostname)
            if status[0] in ['False', 'Unknown']:
                break
        if w.expired:
            raise exceptions.ExecutionError(
                "Failed to bring down node {}".format(hostname))

        # Perform brick evict operation
        try:
            heketi_ops.heketi_brick_evict(h_node, h_server, brick_id)
        except AssertionError as e:
            if ('No Replacement was found' not in six.text_type(e)):
                raise
    def test_targetcli_failure_during_block_pvc_creation(self):
        h_node, h_server = self.heketi_client_node, self.heketi_server_url

        # Disable redundant nodes and leave just 3 nodes online
        h_node_id_list = heketi_node_list(h_node, h_server)
        self.assertGreater(len(h_node_id_list), 2)
        for node_id in h_node_id_list[3:]:
            heketi_node_disable(h_node, h_server, node_id)
            self.addCleanup(heketi_node_enable, h_node, h_server, node_id)

        # Gather info about the Gluster node we are going to use for killing
        # targetcli processes.
        chosen_g_node_id = h_node_id_list[0]
        chosen_g_node_info = heketi_node_info(h_node,
                                              h_server,
                                              chosen_g_node_id,
                                              json=True)
        chosen_g_node_ip = chosen_g_node_info['hostnames']['storage'][0]
        chosen_g_node_hostname = chosen_g_node_info['hostnames']['manage'][0]
        chosen_g_node_ip_and_hostname = set(
            (chosen_g_node_ip, chosen_g_node_hostname))

        g_pods = oc_get_custom_resource(
            self.node,
            'pod', [
                ':.metadata.name', ':.status.hostIP', ':.status.podIP',
                ':.spec.nodeName'
            ],
            selector='glusterfs-node=pod')
        if g_pods and g_pods[0]:
            for g_pod in g_pods:
                if chosen_g_node_ip_and_hostname.intersection(set(g_pod[1:])):
                    host_to_run_cmds = self.node
                    g_pod_prefix, g_pod = 'oc exec %s -- ' % g_pod[0], g_pod[0]
                    break
            else:
                err_msg = (
                    'Failed to find Gluster pod filtering it by following IPs '
                    'and hostnames: %s\nFound following Gluster pods: %s') % (
                        chosen_g_node_ip_and_hostname, g_pods)
                g.log.error(err_msg)
                raise AssertionError(err_msg)
        else:
            host_to_run_cmds, g_pod_prefix, g_pod = chosen_g_node_ip, '', ''

        # Schedule deletion of targetcli process
        file_for_bkp, pvc_number = "~/.targetcli/prefs.bin", 10
        self.cmd_run("%scp %s %s_backup" %
                     (g_pod_prefix, file_for_bkp, file_for_bkp),
                     hostname=host_to_run_cmds)
        self.addCleanup(self.cmd_run,
                        "%srm -f %s_backup" % (g_pod_prefix, file_for_bkp),
                        hostname=host_to_run_cmds)
        kill_targetcli_services_cmd = (
            "while true; do "
            "  %spkill targetcli || echo 'failed to kill targetcli process'; "
            "done" % g_pod_prefix)
        loop_for_killing_targetcli_process = g.run_async(
            host_to_run_cmds, kill_targetcli_services_cmd, "root")
        try:
            # Create bunch of PVCs
            sc_name, pvc_names = self.create_storage_class(), []
            for i in range(pvc_number):
                pvc_names.append(oc_create_pvc(self.node, sc_name, pvc_size=1))
            self.addCleanup(wait_for_resources_absence, self.node, 'pvc',
                            pvc_names)
            self.addCleanup(oc_delete, self.node, 'pvc', ' '.join(pvc_names))

            # Check that we get expected number of provisioning errors
            timeout, wait_step, succeeded_pvcs, failed_pvcs = 120, 1, [], []
            _waiter, err_msg = Waiter(timeout=timeout, interval=wait_step), ""
            for pvc_name in pvc_names:
                _waiter._attempt = 0
                for w in _waiter:
                    events = get_events(self.node,
                                        pvc_name,
                                        obj_type="PersistentVolumeClaim")
                    for event in events:
                        if event['reason'] == 'ProvisioningSucceeded':
                            succeeded_pvcs.append(pvc_name)
                            break
                        elif event['reason'] == 'ProvisioningFailed':
                            failed_pvcs.append(pvc_name)
                            break
                    else:
                        continue
                    break
                if w.expired:
                    err_msg = (
                        "Failed to get neither 'ProvisioningSucceeded' nor "
                        "'ProvisioningFailed' statuses for all the PVCs in "
                        "time. Timeout was %ss, interval was %ss." %
                        (timeout, wait_step))
                    g.log.error(err_msg)
                    raise AssertionError(err_msg)
            self.assertGreater(len(failed_pvcs), len(succeeded_pvcs))
        finally:
            # Restore targetcli workability
            loop_for_killing_targetcli_process._proc.terminate()

            # Revert breakage back which can be caused by BZ-1769426
            check_bkp_file_size_cmd = ("%sls -lah %s | awk '{print $5}'" %
                                       (g_pod_prefix, file_for_bkp))
            bkp_file_size = self.cmd_run(check_bkp_file_size_cmd,
                                         hostname=host_to_run_cmds).strip()
            if bkp_file_size == "0":
                self.cmd_run("%smv %s_backup %s" %
                             (g_pod_prefix, file_for_bkp, file_for_bkp),
                             hostname=host_to_run_cmds)
                breakage_err_msg = (
                    "File located at '%s' was corrupted (zero size) on the "
                    "%s. Looks like BZ-1769426 took effect. \n"
                    "Don't worry, it has been restored after test failure." %
                    (file_for_bkp, "'%s' Gluster pod" % g_pod
                     if g_pod else "'%s' Gluster node" % chosen_g_node_ip))
                g.log.error(breakage_err_msg)
                if err_msg:
                    breakage_err_msg = "%s\n%s" % (err_msg, breakage_err_msg)
                raise AssertionError(breakage_err_msg)

        # Wait for all the PVCs to be in bound state
        wait_for_pvcs_be_bound(self.node, pvc_names, timeout=300, wait_step=5)
Beispiel #16
0
    def test_volume_expansion_no_free_space(self):
        """Validate volume expansion when there is no free space"""

        vol_size, expand_size, additional_devices_attached = None, 10, {}
        h_node, h_server_url = self.heketi_client_node, self.heketi_server_url

        # Get nodes info
        heketi_node_id_list = heketi_ops.heketi_node_list(h_node, h_server_url)
        if len(heketi_node_id_list) < 3:
            self.skipTest("3 Heketi nodes are required.")

        # Disable 4th and other nodes
        for node_id in heketi_node_id_list[3:]:
            heketi_ops.heketi_node_disable(h_node, h_server_url, node_id)
            self.addCleanup(
                heketi_ops.heketi_node_enable, h_node, h_server_url, node_id)

        # Prepare first 3 nodes
        smallest_size = None
        err_msg = ''
        for node_id in heketi_node_id_list[0:3]:
            node_info = heketi_ops.heketi_node_info(
                h_node, h_server_url, node_id, json=True)

            # Disable second and other devices
            devices = node_info["devices"]
            self.assertTrue(
                devices, "Node '%s' does not have devices." % node_id)
            if devices[0]["state"].strip().lower() != "online":
                self.skipTest("Test expects first device to be enabled.")
            if (smallest_size is None
                    or devices[0]["storage"]["free"] < smallest_size):
                smallest_size = devices[0]["storage"]["free"]
            for device in node_info["devices"][1:]:
                heketi_ops.heketi_device_disable(
                    h_node, h_server_url, device["id"])
                self.addCleanup(
                    heketi_ops.heketi_device_enable,
                    h_node, h_server_url, device["id"])

            # Gather info about additional devices
            additional_device_name = None
            for gluster_server in self.gluster_servers:
                gluster_server_data = self.gluster_servers_info[gluster_server]
                g_manage = gluster_server_data["manage"]
                g_storage = gluster_server_data["storage"]
                if not (g_manage in node_info["hostnames"]["manage"]
                        or g_storage in node_info["hostnames"]["storage"]):
                    continue
                additional_device_name = ((
                    gluster_server_data.get("additional_devices") or [''])[0])
                break

            if not additional_device_name:
                err_msg += ("No 'additional_devices' are configured for "
                            "'%s' node, which has following hostnames and "
                            "IP addresses: %s.\n" % (
                                node_id,
                                ', '.join(
                                    node_info["hostnames"]["manage"]
                                    + node_info["hostnames"]["storage"])))
                continue

            heketi_ops.heketi_device_add(
                h_node, h_server_url, additional_device_name, node_id)
            additional_devices_attached.update(
                {node_id: additional_device_name})

        # Schedule cleanup of the added devices
        for node_id in additional_devices_attached.keys():
            node_info = heketi_ops.heketi_node_info(
                h_node, h_server_url, node_id, json=True)
            for device in node_info["devices"]:
                if device["name"] != additional_devices_attached[node_id]:
                    continue
                self.addCleanup(self.detach_devices_attached, device["id"])
                break
            else:
                self.fail("Could not find ID for added device on "
                          "'%s' node." % node_id)

        if err_msg:
            self.skipTest(err_msg)

        # Temporary disable new devices
        self.disable_devices(additional_devices_attached)

        # Create volume and save info about it
        vol_size = int(smallest_size / (1024**2)) - 1
        creation_info = heketi_ops.heketi_volume_create(
            h_node, h_server_url, vol_size, json=True)
        volume_name, volume_id = creation_info["name"], creation_info["id"]
        self.addCleanup(
            heketi_ops.heketi_volume_delete,
            h_node, h_server_url, volume_id, raise_on_error=False)

        volume_info_before_expansion = heketi_ops.heketi_volume_info(
            h_node, h_server_url, volume_id, json=True)
        num_of_bricks_before_expansion = self.get_num_of_bricks(volume_name)
        self.get_brick_and_volume_status(volume_name)
        free_space_before_expansion = self.get_devices_summary_free_space()

        # Try to expand volume with not enough device space
        self.assertRaises(
            AssertionError, heketi_ops.heketi_volume_expand,
            h_node, h_server_url, volume_id, expand_size)

        # Enable new devices to be able to expand our volume
        self.enable_devices(additional_devices_attached)

        # Expand volume and validate results
        heketi_ops.heketi_volume_expand(
            h_node, h_server_url, volume_id, expand_size, json=True)
        free_space_after_expansion = self.get_devices_summary_free_space()
        self.assertGreater(
            free_space_before_expansion, free_space_after_expansion,
            "Free space not consumed after expansion of %s" % volume_id)
        num_of_bricks_after_expansion = self.get_num_of_bricks(volume_name)
        self.get_brick_and_volume_status(volume_name)
        volume_info_after_expansion = heketi_ops.heketi_volume_info(
            h_node, h_server_url, volume_id, json=True)
        self.assertGreater(
            volume_info_after_expansion["size"],
            volume_info_before_expansion["size"],
            "Size of %s not increased" % volume_id)
        self.assertGreater(
            num_of_bricks_after_expansion, num_of_bricks_before_expansion)
        self.assertEqual(
            num_of_bricks_after_expansion % num_of_bricks_before_expansion, 0)

        # Delete volume and validate release of the used space
        heketi_ops.heketi_volume_delete(h_node, h_server_url, volume_id)
        free_space_after_deletion = self.get_devices_summary_free_space()
        self.assertGreater(
            free_space_after_deletion, free_space_after_expansion,
            "Free space not reclaimed after deletion of volume %s" % volume_id)
    def _pv_resize(self, exceed_free_space):
        dir_path = "/mnt"
        pvc_size_gb, min_free_space_gb = 1, 3

        # Get available free space disabling redundant devices and nodes
        heketi_url = self.heketi_server_url
        node_id_list = heketi_ops.heketi_node_list(
            self.heketi_client_node, heketi_url)
        self.assertTrue(node_id_list)
        nodes = {}
        min_free_space = min_free_space_gb * 1024**2
        for node_id in node_id_list:
            node_info = heketi_ops.heketi_node_info(
                self.heketi_client_node, heketi_url, node_id, json=True)
            if (node_info['state'].lower() != 'online' or
                    not node_info['devices']):
                continue
            if len(nodes) > 2:
                out = heketi_ops.heketi_node_disable(
                    self.heketi_client_node, heketi_url, node_id)
                self.assertTrue(out)
                self.addCleanup(
                    heketi_ops.heketi_node_enable,
                    self.heketi_client_node, heketi_url, node_id)
            for device in node_info['devices']:
                if device['state'].lower() != 'online':
                    continue
                free_space = device['storage']['free']
                if (node_id in nodes.keys() or free_space < min_free_space):
                    out = heketi_ops.heketi_device_disable(
                        self.heketi_client_node, heketi_url, device['id'])
                    self.assertTrue(out)
                    self.addCleanup(
                        heketi_ops.heketi_device_enable,
                        self.heketi_client_node, heketi_url, device['id'])
                    continue
                nodes[node_id] = free_space
        if len(nodes) < 3:
            raise self.skipTest(
                "Could not find 3 online nodes with, "
                "at least, 1 online device having free space "
                "bigger than %dGb." % min_free_space_gb)

        # Calculate maximum available size for PVC
        available_size_gb = int(min(nodes.values()) / (1024**2))

        # Create PVC
        self.create_storage_class(allow_volume_expansion=True)
        pvc_name = self.create_and_wait_for_pvc(pvc_size=pvc_size_gb)

        # Create DC with POD and attached PVC to it
        dc_name = oc_create_app_dc_with_io(self.node, pvc_name)
        self.addCleanup(oc_delete, self.node, 'dc', dc_name)
        self.addCleanup(scale_dc_pod_amount_and_wait, self.node, dc_name, 0)
        pod_name = get_pod_name_from_dc(self.node, dc_name)
        wait_for_pod_be_ready(self.node, pod_name)

        if exceed_free_space:
            # Try to expand existing PVC exceeding free space
            resize_pvc(self.node, pvc_name, available_size_gb)
            wait_for_events(self.node, obj_name=pvc_name,
                            event_reason='VolumeResizeFailed')

            # Check that app POD is up and runnig then try to write data
            wait_for_pod_be_ready(self.node, pod_name)
            cmd = (
                "dd if=/dev/urandom of=%s/autotest bs=100K count=1" % dir_path)
            ret, out, err = oc_rsh(self.node, pod_name, cmd)
            self.assertEqual(
                ret, 0,
                "Failed to write data after failed attempt to expand PVC.")
        else:
            # Expand existing PVC using all the available free space
            expand_size_gb = available_size_gb - pvc_size_gb
            resize_pvc(self.node, pvc_name, expand_size_gb)
            verify_pvc_size(self.node, pvc_name, expand_size_gb)
            pv_name = get_pv_name_from_pvc(self.node, pvc_name)
            verify_pv_size(self.node, pv_name, expand_size_gb)
            wait_for_events(
                self.node, obj_name=pvc_name,
                event_reason='VolumeResizeSuccessful')

            # Recreate app POD
            oc_delete(self.node, 'pod', pod_name)
            wait_for_resource_absence(self.node, 'pod', pod_name)
            pod_name = get_pod_name_from_dc(self.node, dc_name)
            wait_for_pod_be_ready(self.node, pod_name)

            # Write data on the expanded PVC
            cmd = ("dd if=/dev/urandom of=%s/autotest "
                   "bs=1M count=1025" % dir_path)
            ret, out, err = oc_rsh(self.node, pod_name, cmd)
            self.assertEqual(
                ret, 0, "Failed to write data on the expanded PVC")
    def test_heketi_device_replacement_in_node(self):
        """Validate device replacement operation on single node"""

        h_client, h_server = self.heketi_client_node, self.heketi_server_url

        try:
            gluster_server_0 = list(g.config["gluster_servers"].values())[0]
            manage_hostname = gluster_server_0["manage"]
            add_device_name = gluster_server_0["additional_devices"][0]
        except (KeyError, IndexError):
            self.skipTest(
                "Additional disk is not specified for node with following "
                "hostnames and IP addresses: {}, {}".format(
                    gluster_server_0.get('manage', '?'),
                    gluster_server_0.get('storage', '?')))

        # Get existing heketi volume list
        existing_volumes = heketi_volume_list(h_client, h_server, json=True)

        # Add cleanup function to clean stale volumes created during test
        self.addCleanup(self._cleanup_heketi_volumes,
                        existing_volumes.get("volumes"))

        # Get nodes info
        node_id_list = heketi_node_list(h_client, h_server)

        # Disable 4th and other nodes
        if len(node_id_list) > 3:
            for node_id in node_id_list[3:]:
                heketi_node_disable(h_client, h_server, node_id)
                self.addCleanup(heketi_node_enable, h_client, h_server,
                                node_id)

        # Create volume when 3 nodes are online
        vol_size, vol_count = 2, 4
        for _ in range(vol_count):
            vol_info = heketi_blockvolume_create(h_client,
                                                 h_server,
                                                 vol_size,
                                                 json=True)
            self.addCleanup(heketi_blockvolume_delete, h_client, h_server,
                            vol_info['id'])

        # Get node ID of the Gluster hostname
        topology_info = heketi_topology_info(h_client, h_server, json=True)
        self.assertIsNotNone(topology_info, "Failed to get topology info")
        self.assertIn("clusters", topology_info.keys(),
                      "Failed to get cluster "
                      "details from topology info")
        node_list = topology_info["clusters"][0]["nodes"]
        self.assertTrue(node_list,
                        "Cluster info command returned empty list of nodes")

        node_id = None
        for node in node_list:
            if manage_hostname == node['hostnames']["manage"][0]:
                node_id = node["id"]
                break
        self.assertTrue(
            node_id, "Failed to get  node info for node  id '{}'".format(
                manage_hostname))

        # Add extra device, then remember it's ID and size
        device_id_new, device_size_new = self._add_heketi_device(
            add_device_name, node_id)

        # Remove one of the existing devices on node except new device
        device_name, device_id = None, None
        node_info_after_addition = heketi_node_info(h_client,
                                                    h_server,
                                                    node_id,
                                                    json=True)
        for device in node_info_after_addition["devices"]:
            if (device["name"] != add_device_name
                    and device["storage"]["total"] == device_size_new):
                device_name = device["name"]
                device_id = device["id"]
                break

        self.assertIsNotNone(device_name, "Failed to get device name")
        self.assertIsNotNone(device_id, "Failed to get device id")
        self.addCleanup(heketi_device_enable,
                        h_client,
                        h_server,
                        device_id,
                        raise_on_error=False)
        self.addCleanup(heketi_device_add,
                        h_client,
                        h_server,
                        device_name,
                        node_id,
                        raise_on_error=False)
        heketi_device_disable(h_client, h_server, device_id)
        heketi_device_remove(h_client, h_server, device_id)
        heketi_device_delete(h_client, h_server, device_id)
    def test_heketi_metrics_validation_after_node(self, condition):
        """Validate heketi metrics after adding and remove node"""

        # Get additional node
        additional_host_info = g.config.get("additional_gluster_servers")
        if not additional_host_info:
            self.skipTest(
                "Skipping this test case as additional gluster server is "
                "not provied in config file")

        additional_host_info = list(additional_host_info.values())[0]
        storage_hostname = additional_host_info.get("manage")
        storage_ip = additional_host_info.get("storage")
        if not (storage_hostname and storage_ip):
            self.skipTest(
                "Config options 'additional_gluster_servers.manage' "
                "and 'additional_gluster_servers.storage' must be set.")

        h_client, h_server = self.heketi_client_node, self.heketi_server_url
        initial_node_count, final_node_count = 0, 0

        # Get initial node count from prometheus metrics
        metric_result = self._fetch_metric_from_promtheus_pod(
            metric='heketi_nodes_count')
        initial_node_count = reduce(
            lambda x, y: x + y,
            [result.get('value')[1] for result in metric_result])

        # Switch to storage project
        openshift_ops.switch_oc_project(
            self._master, self.storage_project_name)

        # Configure node before adding node
        self.configure_node_to_run_gluster(storage_hostname)

        # Get cluster list
        cluster_info = heketi_ops.heketi_cluster_list(
            h_client, h_server, json=True)

        # Add node to the cluster
        heketi_node_info = heketi_ops.heketi_node_add(
            h_client, h_server,
            len(self.gluster_servers), cluster_info.get('clusters')[0],
            storage_hostname, storage_ip, json=True)
        heketi_node_id = heketi_node_info.get("id")
        self.addCleanup(
            heketi_ops.heketi_node_delete,
            h_client, h_server, heketi_node_id, raise_on_error=False)
        self.addCleanup(
            heketi_ops.heketi_node_remove,
            h_client, h_server, heketi_node_id, raise_on_error=False)
        self.addCleanup(
            heketi_ops.heketi_node_disable,
            h_client, h_server, heketi_node_id, raise_on_error=False)
        self.addCleanup(
            openshift_ops.switch_oc_project,
            self._master, self.storage_project_name)

        if condition == 'delete':
            # Switch to openshift-monitoring project
            openshift_ops.switch_oc_project(
                self.ocp_master_node[0], self._prometheus_project_name)

            # Get initial node count from prometheus metrics
            for w in waiter.Waiter(timeout=60, interval=10):
                metric_result = self._fetch_metric_from_promtheus_pod(
                    metric='heketi_nodes_count')
                node_count = reduce(
                    lambda x, y: x + y,
                    [result.get('value')[1] for result in metric_result])
                if node_count != initial_node_count:
                    break

            if w.expired:
                raise exceptions.ExecutionError(
                    "Failed to get updated node details from prometheus")

            # Remove node from cluster
            heketi_ops.heketi_node_disable(h_client, h_server, heketi_node_id)
            heketi_ops.heketi_node_remove(h_client, h_server, heketi_node_id)
            for device in heketi_node_info.get('devices'):
                heketi_ops.heketi_device_delete(
                    h_client, h_server, device.get('id'))
            heketi_ops.heketi_node_delete(h_client, h_server, heketi_node_id)

        # Switch to openshift-monitoring project
        openshift_ops.switch_oc_project(
            self.ocp_master_node[0], self._prometheus_project_name)

        # Get final node count from prometheus metrics
        for w in waiter.Waiter(timeout=60, interval=10):
            metric_result = self._fetch_metric_from_promtheus_pod(
                metric='heketi_nodes_count')
            final_node_count = reduce(
                lambda x, y: x + y,
                [result.get('value')[1] for result in metric_result])

            if condition == 'delete':
                if final_node_count < node_count:
                    break
            else:
                if final_node_count > initial_node_count:
                    break

        if w.expired:
            raise exceptions.ExecutionError(
                "Failed to update node details in prometheus")
Beispiel #20
0
    def test_check_node_disable_based_on_heketi_zone(
            self, zone_count, is_disable_on_different_zone, is_set_env=False):
        """Validate node disable in different heketi zones"""
        expected_node_count, heketi_zone_checking, sc_name = 4, "strict", None

        # Check amount of available online nodes
        online_node_count = len(self._get_online_nodes())
        if online_node_count < expected_node_count:
            self.skipTest('Available node count {} is less than expected node '
                          'count {}'.format(online_node_count,
                                            expected_node_count))

        # Check amount of available online heketi zones
        self._check_for_available_zones(zone_count)

        # Get the online devices and nodes w.r.t. to zone
        zone_devices_nodes = self._get_online_devices_and_nodes_with_zone()

        # Create sc or else directly set env to "strict" inside dc
        is_create_sc = not is_set_env
        if is_create_sc:
            sc_name = self.create_storage_class(
                sc_name_prefix=self.prefix,
                vol_name_prefix=self.prefix,
                heketi_zone_checking=heketi_zone_checking)
        if is_set_env:
            self._set_zone_check_env_in_heketi_dc(heketi_zone_checking)

        # Choose a zone and node_id to disable the device
        for zone, nodes_and_devices in zone_devices_nodes.items():
            if zone_count == 3:
                # Select a node with a zone having multiple nodes in same
                # zone to cover the test cases disable node in same zone
                if len(nodes_and_devices['nodes']) > 1:
                    zone_with_disabled_node = zone
                    disabled_node = nodes_and_devices['nodes'][0]
                    break

            else:
                # Select node from any of the zones
                zone_with_disabled_node = zone
                disabled_node = nodes_and_devices['nodes'][0]
                break

        # Disable the selected node
        heketi_ops.heketi_node_disable(self.h_client, self.h_server,
                                       disabled_node)
        self.addCleanup(heketi_ops.heketi_node_enable, self.h_client,
                        self.h_server, disabled_node)

        # Create some DCs with PVCs and check brick placement in heketi zones
        pod_names = self._create_dcs_and_check_brick_placement(
            self.prefix, sc_name, heketi_zone_checking, zone_count)

        # Enable disabled node
        heketi_ops.heketi_node_enable(self.h_client, self.h_server,
                                      disabled_node)

        if is_disable_on_different_zone:
            # Select the new  node in a different zone
            for zone, nodes_and_devices in zone_devices_nodes.items():
                if zone != zone_with_disabled_node:
                    new_node_to_disable = nodes_and_devices['nodes'][0]
                    break

        else:
            # Select the new node in the same zone
            new_node_to_disable = zone_devices_nodes[zone_with_disabled_node][
                'nodes'][1]

        # Disable the newly selected node
        heketi_ops.heketi_node_disable(self.h_client, self.h_server,
                                       new_node_to_disable)
        self.addCleanup(heketi_ops.heketi_node_enable, self.h_client,
                        self.h_server, new_node_to_disable)

        # Verify if pods are in ready state
        for pod_name in pod_names:
            openshift_ops.wait_for_pod_be_ready(self.node,
                                                pod_name,
                                                timeout=5,
                                                wait_step=2)
Beispiel #21
0
    def test_volume_creation_no_free_devices(self):
        """Validate heketi error is returned when no free devices available"""
        node, server_url = self.heketi_client_node, self.heketi_server_url

        # Get nodes info
        node_id_list = heketi_ops.heketi_node_list(node, server_url)
        node_info_list = []
        for node_id in node_id_list[0:3]:
            node_info = heketi_ops.heketi_node_info(node,
                                                    server_url,
                                                    node_id,
                                                    json=True)
            node_info_list.append(node_info)

        # Disable 4th and other nodes
        for node_id in node_id_list[3:]:
            heketi_ops.heketi_node_disable(node, server_url, node_id)
            self.addCleanup(heketi_ops.heketi_node_enable, node, server_url,
                            node_id)

        # Disable second and other devices on the first 3 nodes
        for node_info in node_info_list[0:3]:
            devices = node_info["devices"]
            self.assertTrue(
                devices, "Node '%s' does not have devices." % node_info["id"])
            if devices[0]["state"].strip().lower() != "online":
                self.skipTest("Test expects first device to be enabled.")
            if len(devices) < 2:
                continue
            for device in node_info["devices"][1:]:
                out = heketi_ops.heketi_device_disable(node, server_url,
                                                       device["id"])
                self.assertTrue(
                    out, "Failed to disable the device %s" % device["id"])
                self.addCleanup(heketi_ops.heketi_device_enable, node,
                                server_url, device["id"])

        # Calculate common available space
        available_spaces = [
            int(node_info["devices"][0]["storage"]["free"])
            for n in node_info_list[0:3]
        ]
        min_space_gb = int(min(available_spaces) / 1024**2)
        self.assertGreater(min_space_gb, 3, "Not enough available free space.")

        # Create first small volume
        vol = heketi_ops.heketi_volume_create(node, server_url, 1, json=True)
        self.addCleanup(heketi_ops.heketi_volume_delete,
                        self.heketi_client_node, self.heketi_server_url,
                        vol["id"])

        # Try to create second volume getting "no free space" error
        try:
            vol_fail = heketi_ops.heketi_volume_create(node,
                                                       server_url,
                                                       min_space_gb,
                                                       json=True)
        except AssertionError:
            g.log.info("Volume was not created as expected.")
        else:
            self.addCleanup(heketi_ops.heketi_volume_delete,
                            self.heketi_client_node, self.heketi_server_url,
                            vol_fail["bricks"][0]["volume"])
            self.assertFalse(
                vol_fail,
                "Volume should have not been created. Out: %s" % vol_fail)
Beispiel #22
0
    def _pv_resize(self, exceed_free_space):
        dir_path = "/mnt"
        pvc_size_gb, min_free_space_gb = 1, 3

        # Get available free space disabling redundant devices and nodes
        heketi_url = self.heketi_server_url
        node_id_list = heketi_ops.heketi_node_list(self.heketi_client_node,
                                                   heketi_url)
        self.assertTrue(node_id_list)
        nodes = {}
        min_free_space = min_free_space_gb * 1024**2
        for node_id in node_id_list:
            node_info = heketi_ops.heketi_node_info(self.heketi_client_node,
                                                    heketi_url,
                                                    node_id,
                                                    json=True)
            if (node_info['state'].lower() != 'online'
                    or not node_info['devices']):
                continue
            if len(nodes) > 2:
                out = heketi_ops.heketi_node_disable(self.heketi_client_node,
                                                     heketi_url, node_id)
                self.assertTrue(out)
                self.addCleanup(heketi_ops.heketi_node_enable,
                                self.heketi_client_node, heketi_url, node_id)
            for device in node_info['devices']:
                if device['state'].lower() != 'online':
                    continue
                free_space = device['storage']['free']
                if (node_id in nodes.keys() or free_space < min_free_space):
                    out = heketi_ops.heketi_device_disable(
                        self.heketi_client_node, heketi_url, device['id'])
                    self.assertTrue(out)
                    self.addCleanup(heketi_ops.heketi_device_enable,
                                    self.heketi_client_node, heketi_url,
                                    device['id'])
                    continue
                nodes[node_id] = free_space
        if len(nodes) < 3:
            raise self.skipTest("Could not find 3 online nodes with, "
                                "at least, 1 online device having free space "
                                "bigger than %dGb." % min_free_space_gb)

        # Calculate maximum available size for PVC
        available_size_gb = int(min(nodes.values()) / (1024**2))

        # Create PVC
        self.create_storage_class(allow_volume_expansion=True)
        pvc_name = self.create_and_wait_for_pvc(pvc_size=pvc_size_gb)

        # Create DC with POD and attached PVC to it
        dc_name = oc_create_app_dc_with_io(self.node, pvc_name)
        self.addCleanup(oc_delete, self.node, 'dc', dc_name)
        self.addCleanup(scale_dc_pod_amount_and_wait, self.node, dc_name, 0)
        pod_name = get_pod_name_from_dc(self.node, dc_name)
        wait_for_pod_be_ready(self.node, pod_name)

        if exceed_free_space:
            # Try to expand existing PVC exceeding free space
            resize_pvc(self.node, pvc_name, available_size_gb)
            wait_for_events(self.node,
                            obj_name=pvc_name,
                            event_reason='VolumeResizeFailed')

            # Check that app POD is up and runnig then try to write data
            wait_for_pod_be_ready(self.node, pod_name)
            cmd = ("dd if=/dev/urandom of=%s/autotest bs=100K count=1" %
                   dir_path)
            ret, out, err = oc_rsh(self.node, pod_name, cmd)
            self.assertEqual(
                ret, 0,
                "Failed to write data after failed attempt to expand PVC.")
        else:
            # Expand existing PVC using all the available free space
            expand_size_gb = available_size_gb - pvc_size_gb
            resize_pvc(self.node, pvc_name, expand_size_gb)
            verify_pvc_size(self.node, pvc_name, expand_size_gb)
            pv_name = get_pv_name_from_pvc(self.node, pvc_name)
            verify_pv_size(self.node, pv_name, expand_size_gb)
            wait_for_events(self.node,
                            obj_name=pvc_name,
                            event_reason='VolumeResizeSuccessful')

            # Recreate app POD
            oc_delete(self.node, 'pod', pod_name)
            wait_for_resource_absence(self.node, 'pod', pod_name)
            pod_name = get_pod_name_from_dc(self.node, dc_name)
            wait_for_pod_be_ready(self.node, pod_name)

            # Write data on the expanded PVC
            cmd = ("dd if=/dev/urandom of=%s/autotest "
                   "bs=1M count=1025" % dir_path)
            ret, out, err = oc_rsh(self.node, pod_name, cmd)
            self.assertEqual(ret, 0,
                             "Failed to write data on the expanded PVC")
    def test_volume_expansion_no_free_space(self):
        """Validate volume expansion when there is no free space"""

        vol_size, expand_size, additional_devices_attached = None, 10, {}
        h_node, h_server_url = self.heketi_client_node, self.heketi_server_url

        # Get nodes info
        heketi_node_id_list = heketi_ops.heketi_node_list(h_node, h_server_url)
        if len(heketi_node_id_list) < 3:
            self.skipTest("3 Heketi nodes are required.")

        # Disable 4th and other nodes
        for node_id in heketi_node_id_list[3:]:
            heketi_ops.heketi_node_disable(h_node, h_server_url, node_id)
            self.addCleanup(
                heketi_ops.heketi_node_enable, h_node, h_server_url, node_id)

        # Prepare first 3 nodes
        smallest_size = None
        err_msg = ''
        for node_id in heketi_node_id_list[0:3]:
            node_info = heketi_ops.heketi_node_info(
                h_node, h_server_url, node_id, json=True)

            # Disable second and other devices
            devices = node_info["devices"]
            self.assertTrue(
                devices, "Node '%s' does not have devices." % node_id)
            if devices[0]["state"].strip().lower() != "online":
                self.skipTest("Test expects first device to be enabled.")
            if (smallest_size is None or
                    devices[0]["storage"]["free"] < smallest_size):
                smallest_size = devices[0]["storage"]["free"]
            for device in node_info["devices"][1:]:
                heketi_ops.heketi_device_disable(
                    h_node, h_server_url, device["id"])
                self.addCleanup(
                    heketi_ops.heketi_device_enable,
                    h_node, h_server_url, device["id"])

            # Gather info about additional devices
            additional_device_name = None
            for gluster_server in self.gluster_servers:
                gluster_server_data = self.gluster_servers_info[gluster_server]
                g_manage = gluster_server_data["manage"]
                g_storage = gluster_server_data["storage"]
                if not (g_manage in node_info["hostnames"]["manage"] or
                        g_storage in node_info["hostnames"]["storage"]):
                    continue
                additional_device_name = ((
                    gluster_server_data.get("additional_devices") or [''])[0])
                break

            if not additional_device_name:
                err_msg += ("No 'additional_devices' are configured for "
                            "'%s' node, which has following hostnames and "
                            "IP addresses: %s.\n" % (
                                node_id,
                                ', '.join(node_info["hostnames"]["manage"] +
                                          node_info["hostnames"]["storage"])))
                continue

            heketi_ops.heketi_device_add(
                h_node, h_server_url, additional_device_name, node_id)
            additional_devices_attached.update(
                {node_id: additional_device_name})

        # Schedule cleanup of the added devices
        for node_id in additional_devices_attached.keys():
            node_info = heketi_ops.heketi_node_info(
                h_node, h_server_url, node_id, json=True)
            for device in node_info["devices"]:
                if device["name"] != additional_devices_attached[node_id]:
                    continue
                self.addCleanup(self.detach_devices_attached, device["id"])
                break
            else:
                self.fail("Could not find ID for added device on "
                          "'%s' node." % node_id)

        if err_msg:
            self.skipTest(err_msg)

        # Temporary disable new devices
        self.disable_devices(additional_devices_attached)

        # Create volume and save info about it
        vol_size = int(smallest_size / (1024**2)) - 1
        creation_info = heketi_ops.heketi_volume_create(
            h_node, h_server_url, vol_size, json=True)
        volume_name, volume_id = creation_info["name"], creation_info["id"]
        self.addCleanup(
            heketi_ops.heketi_volume_delete,
            h_node, h_server_url, volume_id, raise_on_error=False)

        volume_info_before_expansion = heketi_ops.heketi_volume_info(
            h_node, h_server_url, volume_id, json=True)
        num_of_bricks_before_expansion = self.get_num_of_bricks(volume_name)
        self.get_brick_and_volume_status(volume_name)
        free_space_before_expansion = self.get_devices_summary_free_space()

        # Try to expand volume with not enough device space
        self.assertRaises(
            ExecutionError, heketi_ops.heketi_volume_expand,
            h_node, h_server_url, volume_id, expand_size)

        # Enable new devices to be able to expand our volume
        self.enable_devices(additional_devices_attached)

        # Expand volume and validate results
        heketi_ops.heketi_volume_expand(
            h_node, h_server_url, volume_id, expand_size, json=True)
        free_space_after_expansion = self.get_devices_summary_free_space()
        self.assertGreater(
            free_space_before_expansion, free_space_after_expansion,
            "Free space not consumed after expansion of %s" % volume_id)
        num_of_bricks_after_expansion = self.get_num_of_bricks(volume_name)
        self.get_brick_and_volume_status(volume_name)
        volume_info_after_expansion = heketi_ops.heketi_volume_info(
            h_node, h_server_url, volume_id, json=True)
        self.assertGreater(
            volume_info_after_expansion["size"],
            volume_info_before_expansion["size"],
            "Size of %s not increased" % volume_id)
        self.assertGreater(
            num_of_bricks_after_expansion, num_of_bricks_before_expansion)
        self.assertEqual(
            num_of_bricks_after_expansion % num_of_bricks_before_expansion, 0)

        # Delete volume and validate release of the used space
        heketi_ops.heketi_volume_delete(h_node, h_server_url, volume_id)
        free_space_after_deletion = self.get_devices_summary_free_space()
        self.assertGreater(
            free_space_after_deletion, free_space_after_expansion,
            "Free space not reclaimed after deletion of volume %s" % volume_id)
    def test_heketi_with_device_removal_insuff_space(self):
        """Validate heketi with device removal insufficient space"""

        # Disable 4+ nodes and 3+ devices on the first 3 nodes
        min_free_space_gb = 5
        min_free_space = min_free_space_gb * 1024**2
        heketi_url = self.heketi_server_url
        heketi_node = self.heketi_client_node
        nodes = {}

        node_ids = heketi_node_list(heketi_node, heketi_url)
        self.assertTrue(node_ids)
        for node_id in node_ids:
            node_info = heketi_node_info(heketi_node,
                                         heketi_url,
                                         node_id,
                                         json=True)
            if (node_info["state"].lower() != "online"
                    or not node_info["devices"]):
                continue
            if len(nodes) > 2:
                heketi_node_disable(heketi_node, heketi_url, node_id)
                self.addCleanup(heketi_node_enable, heketi_node, heketi_url,
                                node_id)
                continue
            for device in node_info["devices"]:
                if device["state"].lower() != "online":
                    continue
                free_space = device["storage"]["free"]
                if node_id not in nodes:
                    nodes[node_id] = []
                if (free_space < min_free_space or len(nodes[node_id]) > 1):
                    heketi_device_disable(heketi_node, heketi_url,
                                          device["id"])
                    self.addCleanup(heketi_device_enable, heketi_node,
                                    heketi_url, device["id"])
                    continue
                nodes[node_id].append({
                    "device_id": device["id"],
                    "free": free_space
                })

        # Skip test if nodes requirements are not met
        if (len(nodes) < 3
                or not all(map(
                    (lambda _list: len(_list) > 1), nodes.values()))):
            raise self.skipTest(
                "Could not find 3 online nodes with 2 online devices "
                "having free space bigger than %dGb." % min_free_space_gb)

        # Calculate size of a potential distributed vol
        if nodes[node_ids[0]][0]["free"] > nodes[node_ids[0]][1]["free"]:
            index = 0
        else:
            index = 1
        vol_size_gb = int(nodes[node_ids[0]][index]["free"] / (1024**2)) + 1
        device_id = nodes[node_ids[0]][index]["device_id"]

        # Create volume with such size that we consume space more than
        # size of smaller disks
        try:
            heketi_vol = heketi_volume_create(heketi_node,
                                              heketi_url,
                                              vol_size_gb,
                                              json=True)
        except Exception as e:
            g.log.warning(
                "Got following error trying to create '%s'Gb vol: %s" %
                (vol_size_gb, e))
            vol_size_gb -= 1
            heketi_vol = heketi_volume_create(heketi_node,
                                              heketi_url,
                                              vol_size_gb,
                                              json=True)
        self.addCleanup(heketi_volume_delete, self.heketi_client_node,
                        self.heketi_server_url,
                        heketi_vol["bricks"][0]["volume"])

        # Try to 'remove' bigger Heketi disk expecting error,
        # because there is no space on smaller disk to relocate bricks to
        heketi_device_disable(heketi_node, heketi_url, device_id)
        self.addCleanup(heketi_device_enable, heketi_node, heketi_url,
                        device_id)
        try:
            self.assertRaises(ExecutionError, heketi_device_remove,
                              heketi_node, heketi_url, device_id)
        except Exception:
            self.addCleanup(heketi_device_disable, heketi_node, heketi_url,
                            device_id)
            raise
Beispiel #25
0
    def test_dev_path_mapping_heketi_device_delete(self):
        """Validate dev path mapping for heketi device delete lifecycle"""
        h_client, h_url = self.heketi_client_node, self.heketi_server_url

        node_ids = heketi_ops.heketi_node_list(h_client, h_url)
        self.assertTrue(node_ids, "Failed to get heketi node list")

        # Fetch #4th node for the operations
        h_disable_node = node_ids[3]

        # Fetch bricks on the devices before volume create
        h_node_details_before, h_node = self._get_bricks_and_device_details()

        # Bricks count on the node before pvc creation
        brick_count_before = [count[1] for count in h_node_details_before]

        # Create file volume with app pod and verify IO's
        # and compare path, UUID, vg_name
        pod_name, dc_name, use_percent = self._create_app_pod_and_verify_pvs()

        # Check if IO's are running
        use_percent_after = self._get_space_use_percent_in_app_pod(pod_name)
        self.assertNotEqual(
            use_percent, use_percent_after,
            "Failed to execute IO's in the app pod {} after respin".format(
                pod_name))

        # Fetch bricks on the devices after volume create
        h_node_details_after, h_node = self._get_bricks_and_device_details()

        # Bricks count on the node after pvc creation
        brick_count_after = [count[1] for count in h_node_details_after]

        self.assertGreater(
            sum(brick_count_after), sum(brick_count_before),
            "Failed to add bricks on the node {}".format(h_node))

        # Enable the #4th node
        heketi_ops.heketi_node_enable(h_client, h_url, h_disable_node)
        node_info = heketi_ops.heketi_node_info(h_client,
                                                h_url,
                                                h_disable_node,
                                                json=True)
        h_node_id = node_info['id']
        self.assertEqual(node_info['state'], "online",
                         "Failed to enable node {}".format(h_disable_node))

        # Fetch device list i.e to be deleted
        h_node_info = heketi_ops.heketi_node_info(h_client,
                                                  h_url,
                                                  h_node,
                                                  json=True)
        devices_list = [[device['id'], device['name']]
                        for device in h_node_info['devices']]

        # Device deletion operation
        for device in devices_list:
            device_id, device_name = device[0], device[1]
            self.addCleanup(heketi_ops.heketi_device_enable,
                            h_client,
                            h_url,
                            device_id,
                            raise_on_error=False)

            # Disable device from heketi
            device_disable = heketi_ops.heketi_device_disable(
                h_client, h_url, device_id)
            self.assertTrue(
                device_disable,
                "Device {} could not be disabled".format(device_id))

            device_info = heketi_ops.heketi_device_info(h_client,
                                                        h_url,
                                                        device_id,
                                                        json=True)
            self.assertEqual(device_info['state'], "offline",
                             "Failed to disable device {}".format(device_id))

            # Remove device from heketi
            device_remove = heketi_ops.heketi_device_remove(
                h_client, h_url, device_id)
            self.assertTrue(device_remove,
                            "Device {} could not be removed".format(device_id))

            # Bricks after device removal
            device_info = heketi_ops.heketi_device_info(h_client,
                                                        h_url,
                                                        device_id,
                                                        json=True)
            bricks_count_after = len(device_info['bricks'])
            self.assertFalse(
                bricks_count_after,
                "Failed to remove the bricks from the device {}".format(
                    device_id))

            # Delete device from heketi
            self.addCleanup(heketi_ops.heketi_device_add,
                            h_client,
                            h_url,
                            device_name,
                            h_node,
                            raise_on_error=False)
            device_delete = heketi_ops.heketi_device_delete(
                h_client, h_url, device_id)
            self.assertTrue(device_delete,
                            "Device {} could not be deleted".format(device_id))

        # Check if IO's are running after device is deleted
        use_percent_after = self._get_space_use_percent_in_app_pod(pod_name)
        self.assertNotEqual(
            use_percent, use_percent_after,
            "Failed to execute IO's in the app pod {} after respin".format(
                pod_name))

        # Add device operations
        for device in devices_list:
            device_name = device[1]

            # Add device back to the node
            heketi_ops.heketi_device_add(h_client, h_url, device_name, h_node)

            # Fetch device info after device add
            node_info = heketi_ops.heketi_node_info(h_client,
                                                    h_url,
                                                    h_node,
                                                    json=True)
            device_id = None
            for device in node_info["devices"]:
                if device["name"] == device_name:
                    device_id = device["id"]
                    break
            self.assertTrue(
                device_id, "Failed to add device {} on node"
                " {}".format(device_name, h_node))

        # Disable the #4th node
        heketi_ops.heketi_node_disable(h_client, h_url, h_node_id)
        node_info = heketi_ops.heketi_node_info(h_client,
                                                h_url,
                                                h_node_id,
                                                json=True)
        self.assertEqual(node_info['state'], "offline",
                         "Failed to disable node {}".format(h_node_id))
        pvc_amount, pvc_size = 5, 1

        # Fetch bricks on the devices before volume create
        h_node_details_before, h_node = self._get_bricks_and_device_details()

        # Bricks count on the node before pvc creation
        brick_count_before = [count[1] for count in h_node_details_before]

        # Create file volumes
        pvc_name = self.create_and_wait_for_pvcs(pvc_size=pvc_size,
                                                 pvc_amount=pvc_amount)
        self.assertEqual(len(pvc_name), pvc_amount,
                         "Failed to create {} pvc".format(pvc_amount))

        # Fetch bricks on the devices after volume create
        h_node_details_after, h_node = self._get_bricks_and_device_details()

        # Bricks count on the node after pvc creation
        brick_count_after = [count[1] for count in h_node_details_after]

        self.assertGreater(
            sum(brick_count_after), sum(brick_count_before),
            "Failed to add bricks on the node {}".format(h_node))

        # Check if IO's are running after new device is added
        use_percent_after = self._get_space_use_percent_in_app_pod(pod_name)
        self.assertNotEqual(
            use_percent, use_percent_after,
            "Failed to execute IO's in the app pod {} after respin".format(
                pod_name))
Beispiel #26
0
    def test_dev_path_mapping_heketi_node_delete(self):
        """Validate dev path mapping for heketi node deletion lifecycle"""
        h_client, h_url = self.heketi_client_node, self.heketi_server_url

        node_ids = heketi_ops.heketi_node_list(h_client, h_url)
        self.assertTrue(node_ids, "Failed to get heketi node list")

        # Fetch #4th node for the operations
        h_disable_node = node_ids[3]

        # Fetch bricks on the devices before volume create
        h_node_details_before, h_node = self._get_bricks_and_device_details()

        # Bricks count on the node before pvc creation
        brick_count_before = [count[1] for count in h_node_details_before]

        # Create file volume with app pod and verify IO's
        # and compare path, UUID, vg_name
        pod_name, dc_name, use_percent = self._create_app_pod_and_verify_pvs()

        # Check if IO's are running
        use_percent_after = self._get_space_use_percent_in_app_pod(pod_name)
        self.assertNotEqual(
            use_percent, use_percent_after,
            "Failed to execute IO's in the app pod {} after respin".format(
                pod_name))

        # Fetch bricks on the devices after volume create
        h_node_details_after, h_node = self._get_bricks_and_device_details()

        # Bricks count on the node after pvc creation
        brick_count_after = [count[1] for count in h_node_details_after]

        self.assertGreater(
            sum(brick_count_after), sum(brick_count_before),
            "Failed to add bricks on the node {}".format(h_node))
        self.addCleanup(heketi_ops.heketi_node_disable, h_client, h_url,
                        h_disable_node)

        # Enable the #4th node
        heketi_ops.heketi_node_enable(h_client, h_url, h_disable_node)
        node_info = heketi_ops.heketi_node_info(h_client,
                                                h_url,
                                                h_disable_node,
                                                json=True)
        h_node_id = node_info['id']
        self.assertEqual(node_info['state'], "online",
                         "Failed to enable node {}".format(h_disable_node))

        # Disable the node and check for brick migrations
        self.addCleanup(heketi_ops.heketi_node_enable,
                        h_client,
                        h_url,
                        h_node,
                        raise_on_error=False)
        heketi_ops.heketi_node_disable(h_client, h_url, h_node)
        node_info = heketi_ops.heketi_node_info(h_client,
                                                h_url,
                                                h_node,
                                                json=True)
        self.assertEqual(node_info['state'], "offline",
                         "Failed to disable node {}".format(h_node))

        # Before bricks migration
        h_node_info = heketi_ops.heketi_node_info(h_client,
                                                  h_url,
                                                  h_node,
                                                  json=True)

        # Bricks before migration on the node i.e to be deleted
        bricks_counts_before = 0
        for device in h_node_info['devices']:
            bricks_counts_before += (len(device['bricks']))

        # Remove the node
        heketi_ops.heketi_node_remove(h_client, h_url, h_node)

        # After bricks migration
        h_node_info_after = heketi_ops.heketi_node_info(h_client,
                                                        h_url,
                                                        h_node,
                                                        json=True)

        # Bricks after migration on the node i.e to be delete
        bricks_counts = 0
        for device in h_node_info_after['devices']:
            bricks_counts += (len(device['bricks']))

        self.assertFalse(
            bricks_counts,
            "Failed to remove all the bricks from node {}".format(h_node))

        # Old node which is to deleted, new node were bricks resides
        old_node, new_node = h_node, h_node_id

        # Node info for the new node were brick reside after migration
        h_node_info_new = heketi_ops.heketi_node_info(h_client,
                                                      h_url,
                                                      new_node,
                                                      json=True)

        bricks_counts_after = 0
        for device in h_node_info_new['devices']:
            bricks_counts_after += (len(device['bricks']))

        self.assertEqual(
            bricks_counts_before, bricks_counts_after,
            "Failed to migrated bricks from {} node to  {}".format(
                old_node, new_node))

        # Fetch device list i.e to be deleted
        h_node_info = heketi_ops.heketi_node_info(h_client,
                                                  h_url,
                                                  h_node,
                                                  json=True)
        devices_list = [[device['id'], device['name']]
                        for device in h_node_info['devices']]

        for device in devices_list:
            device_id = device[0]
            device_name = device[1]
            self.addCleanup(heketi_ops.heketi_device_add,
                            h_client,
                            h_url,
                            device_name,
                            h_node,
                            raise_on_error=False)

            # Device deletion from heketi node
            device_delete = heketi_ops.heketi_device_delete(
                h_client, h_url, device_id)
            self.assertTrue(device_delete,
                            "Failed to delete the device {}".format(device_id))

        node_info = heketi_ops.heketi_node_info(h_client,
                                                h_url,
                                                h_node,
                                                json=True)
        cluster_id = node_info['cluster']
        zone = node_info['zone']
        storage_hostname = node_info['hostnames']['manage'][0]
        storage_ip = node_info['hostnames']['storage'][0]

        # Delete the node
        self.addCleanup(heketi_ops.heketi_node_add,
                        h_client,
                        h_url,
                        zone,
                        cluster_id,
                        storage_hostname,
                        storage_ip,
                        raise_on_error=False)
        heketi_ops.heketi_node_delete(h_client, h_url, h_node)

        # Verify if the node is deleted
        node_ids = heketi_ops.heketi_node_list(h_client, h_url)
        self.assertNotIn(old_node, node_ids,
                         "Failed to delete the node {}".format(old_node))

        # Check if IO's are running
        use_percent_after = self._get_space_use_percent_in_app_pod(pod_name)
        self.assertNotEqual(
            use_percent, use_percent_after,
            "Failed to execute IO's in the app pod {} after respin".format(
                pod_name))

        # Adding node back
        h_node_info = heketi_ops.heketi_node_add(h_client,
                                                 h_url,
                                                 zone,
                                                 cluster_id,
                                                 storage_hostname,
                                                 storage_ip,
                                                 json=True)
        self.assertTrue(
            h_node_info,
            "Failed to add the node in the cluster {}".format(cluster_id))
        h_node_id = h_node_info["id"]

        # Adding devices to the new node
        for device in devices_list:
            storage_device = device[1]

            # Add device to the new heketi node
            heketi_ops.heketi_device_add(h_client, h_url, storage_device,
                                         h_node_id)
            heketi_node_info = heketi_ops.heketi_node_info(h_client,
                                                           h_url,
                                                           h_node_id,
                                                           json=True)
            device_id = None
            for device in heketi_node_info["devices"]:
                if device["name"] == storage_device:
                    device_id = device["id"]
                    break

            self.assertTrue(
                device_id, "Failed to add device {} on node {}".format(
                    storage_device, h_node_id))

        # Create n pvc in order to verfiy if the bricks reside on the new node
        pvc_amount, pvc_size = 5, 1

        # Fetch bricks on the devices before volume create
        h_node_details_before, h_node = self._get_bricks_and_device_details()

        # Bricks count on the node before pvc creation
        brick_count_before = [count[1] for count in h_node_details_before]

        # Create file volumes
        pvc_name = self.create_and_wait_for_pvcs(pvc_size=pvc_size,
                                                 pvc_amount=pvc_amount)
        self.assertEqual(len(pvc_name), pvc_amount,
                         "Failed to create {} pvc".format(pvc_amount))

        # Fetch bricks on the devices before volume create
        h_node_details_after, h_node = self._get_bricks_and_device_details()

        # Bricks count on the node after pvc creation
        brick_count_after = [count[1] for count in h_node_details_after]

        self.assertGreater(
            sum(brick_count_after), sum(brick_count_before),
            "Failed to add bricks on the new node {}".format(new_node))

        # Check if IO's are running after new node is added
        use_percent_after = self._get_space_use_percent_in_app_pod(pod_name)
        self.assertNotEqual(
            use_percent, use_percent_after,
            "Failed to execute IO's in the app pod {} after respin".format(
                pod_name))
Beispiel #27
0
    def test_arbiter_required_tag_on_node_or_devices_other_disabled(
            self, r_node_tag, d_node_tag, r_device_tag, d_device_tag):
        """Validate arbiter vol creation with node or device tag"""

        pvc_amount = 3

        # Get Heketi nodes info
        node_id_list = heketi_ops.heketi_node_list(self.heketi_client_node,
                                                   self.heketi_server_url)

        # Disable n-3 nodes
        for node_id in node_id_list[3:]:
            heketi_ops.heketi_node_disable(self.heketi_client_node,
                                           self.heketi_server_url, node_id)
            self.addCleanup(heketi_ops.heketi_node_enable,
                            self.heketi_client_node, self.heketi_server_url,
                            node_id)

        # Set arbiter:required tags
        arbiter_node = heketi_ops.heketi_node_info(self.heketi_client_node,
                                                   self.heketi_server_url,
                                                   node_id_list[0],
                                                   json=True)
        arbiter_nodes_ip_addresses = arbiter_node['hostnames']['storage']
        self._set_arbiter_tag_with_further_revert(
            self.heketi_client_node,
            self.heketi_server_url,
            'node',
            node_id_list[0], ('required' if r_node_tag else None),
            revert_to=arbiter_node.get('tags', {}).get('arbiter'))
        for device in arbiter_node['devices']:
            self._set_arbiter_tag_with_further_revert(
                self.heketi_client_node,
                self.heketi_server_url,
                'device',
                device['id'], ('required' if r_device_tag else None),
                revert_to=device.get('tags', {}).get('arbiter'))

        # Set arbiter:disabled tags
        data_nodes_ip_addresses = []
        for node_id in node_id_list[1:]:
            node_info = heketi_ops.heketi_node_info(self.heketi_client_node,
                                                    self.heketi_server_url,
                                                    node_id,
                                                    json=True)
            if not any([
                    int(d['storage']['free']) > (pvc_amount * 1024**2)
                    for d in node_info['devices']
            ]):
                self.skipTest("Devices are expected to have more than "
                              "%sGb of free space" % pvc_amount)
            data_nodes_ip_addresses.extend(node_info['hostnames']['storage'])
            for device in node_info['devices']:
                self._set_arbiter_tag_with_further_revert(
                    self.heketi_client_node,
                    self.heketi_server_url,
                    'device',
                    device['id'], ('disabled' if d_device_tag else None),
                    revert_to=device.get('tags', {}).get('arbiter'))
            self._set_arbiter_tag_with_further_revert(
                self.heketi_client_node,
                self.heketi_server_url,
                'node',
                node_id, ('disabled' if d_node_tag else None),
                revert_to=node_info.get('tags', {}).get('arbiter'))

        # Create PVCs and check that their bricks are correctly located
        self.create_storage_class(is_arbiter_vol=True)
        for i in range(pvc_amount):
            self.create_and_wait_for_pvc(1)

            # Get gluster volume info
            vol_info = openshift_ops.get_gluster_vol_info_by_pvc_name(
                self.node, self.pvc_name)
            arbiter_bricks, data_bricks = [], []
            for brick in vol_info['bricks']['brick']:
                if int(brick["isArbiter"]) == 1:
                    arbiter_bricks.append(brick["name"])
                else:
                    data_bricks.append(brick["name"])

            # Verify that all the arbiter bricks are located on
            # arbiter:required node and data bricks on all other nodes only.
            for arbiter_brick in arbiter_bricks:
                self.assertIn(
                    arbiter_brick.split(':')[0], arbiter_nodes_ip_addresses)
            for data_brick in data_bricks:
                self.assertIn(
                    data_brick.split(':')[0], data_nodes_ip_addresses)
Beispiel #28
0
    def test_volume_creation_of_size_greater_than_the_device_size(self):
        """Validate creation of a volume of size greater than the size of a
        device.
        """
        h_node, h_url = self.heketi_client_node, self.heketi_server_url

        # Remove existing BHV to calculate freespace
        bhv_list = heketi_ops.get_block_hosting_volume_list(h_node, h_url)
        if bhv_list:
            for bhv in bhv_list:
                bhv_info = heketi_ops.heketi_volume_info(h_node,
                                                         h_url,
                                                         bhv,
                                                         json=True)
                if bhv_info['blockinfo'].get('blockvolume') is None:
                    heketi_ops.heketi_volume_delete(h_node, h_url, bhv)

        topology = heketi_ops.heketi_topology_info(h_node, h_url, json=True)
        nodes_free_space, nodes_ips = [], []
        selected_nodes, selected_devices = [], []
        cluster = topology['clusters'][0]
        node_count = len(cluster['nodes'])
        msg = ("At least 3 Nodes are required in cluster. "
               "But only %s Nodes are present." % node_count)
        if node_count < 3:
            self.skipTest(msg)

        online_nodes_count = 0
        for node in cluster['nodes']:
            nodes_ips.append(node['hostnames']['storage'][0])

            if node['state'] != 'online':
                continue

            online_nodes_count += 1

            # Disable nodes after 3rd online nodes
            if online_nodes_count > 3:
                heketi_ops.heketi_node_disable(h_node, h_url, node['id'])
                self.addCleanup(heketi_ops.heketi_node_enable, h_node, h_url,
                                node['id'])
                continue

            selected_nodes.append(node['id'])

            device_count = len(node['devices'])
            msg = ("At least 2 Devices are required on each Node."
                   "But only %s Devices are present." % device_count)
            if device_count < 2:
                self.skipTest(msg)

            sel_devices, online_devices_count, free_space = [], 0, 0
            for device in node['devices']:
                if device['state'] != 'online':
                    continue

                online_devices_count += 1

                # Disable devices after 2nd online devices
                if online_devices_count > 2:
                    heketi_ops.heketi_device_disable(h_node, h_url,
                                                     device['id'])
                    self.addCleanup(heketi_ops.heketi_device_enable, h_node,
                                    h_url, device['id'])
                    continue

                sel_devices.append(device['id'])
                free_space += int(device['storage']['free'] / (1024**2))

            selected_devices.append(sel_devices)
            nodes_free_space.append(free_space)

            msg = ("At least 2 online Devices are required on each Node. "
                   "But only %s Devices are online on Node: %s." %
                   (online_devices_count, node['id']))
            if online_devices_count < 2:
                self.skipTest(msg)

        msg = ("At least 3 online Nodes are required in cluster. "
               "But only %s Nodes are online in Cluster: %s." %
               (online_nodes_count, cluster['id']))
        if online_nodes_count < 3:
            self.skipTest(msg)

        # Select node with minimum free space
        min_free_size = min(nodes_free_space)
        index = nodes_free_space.index(min_free_size)

        # Get max device size from selected node
        device_size = 0
        for device in selected_devices[index]:
            device_info = heketi_ops.heketi_device_info(h_node,
                                                        h_url,
                                                        device,
                                                        json=True)
            device_size = max(device_size,
                              (int(device_info['storage']['total'] /
                                   (1024**2))))

        vol_size = device_size + 1

        if vol_size >= min_free_size:
            self.skipTest('Required free space %s is not available' % vol_size)

        # Create heketi volume with device size + 1
        vol_info = self.create_heketi_volume_with_name_and_wait(
            name="volume_size_greater_than_device_size",
            size=vol_size,
            json=True)

        # Get gluster server IP's from heketi volume info
        glusterfs_servers = heketi_ops.get_vol_file_servers_and_hosts(
            h_node, h_url, vol_info['id'])

        # Verify gluster server IP's in heketi volume info
        msg = ("gluster IP's '%s' does not match with IP's '%s' found in "
               "heketi volume info" %
               (nodes_ips, glusterfs_servers['vol_servers']))
        self.assertEqual(set(glusterfs_servers['vol_servers']), set(nodes_ips),
                         msg)

        vol_name = vol_info['name']
        gluster_v_info = self.get_gluster_vol_info(vol_name)

        # Verify replica count in gluster v info
        msg = "Volume %s is replica %s instead of replica 3" % (
            vol_name, gluster_v_info['replicaCount'])
        self.assertEqual('3', gluster_v_info['replicaCount'])

        # Verify distCount in gluster v info
        msg = "Volume %s distCount is %s instead of distCount as 3" % (
            vol_name, int(gluster_v_info['distCount']))
        self.assertEqual(
            int(gluster_v_info['brickCount']) // 3,
            int(gluster_v_info['distCount']), msg)

        # Verify bricks count in gluster v info
        msg = (
            "Volume %s does not have bricks count multiple of 3. It has %s" %
            (vol_name, gluster_v_info['brickCount']))
        self.assertFalse(int(gluster_v_info['brickCount']) % 3, msg)
Beispiel #29
0
    def test_heketi_device_removal_with_insuff_space(self):
        """Validate heketi with device removal insufficient space"""

        # Disable 4+ nodes and 3+ devices on the first 3 nodes
        min_free_space_gb = 5
        min_free_space = min_free_space_gb * 1024**2
        heketi_url = self.heketi_server_url
        heketi_node = self.heketi_client_node
        nodes = {}

        node_ids = heketi_node_list(heketi_node, heketi_url)
        self.assertTrue(node_ids)
        for node_id in node_ids:
            node_info = heketi_node_info(heketi_node,
                                         heketi_url,
                                         node_id,
                                         json=True)
            if (node_info["state"].lower() != "online"
                    or not node_info["devices"]):
                continue
            if len(nodes) > 2:
                heketi_node_disable(heketi_node, heketi_url, node_id)
                self.addCleanup(heketi_node_enable, heketi_node, heketi_url,
                                node_id)
                continue
            for device in node_info["devices"]:
                if device["state"].lower() != "online":
                    continue
                free_space = device["storage"]["free"]
                if node_id not in nodes:
                    nodes[node_id] = []
                if (free_space < min_free_space or len(nodes[node_id]) > 1):
                    heketi_device_disable(heketi_node, heketi_url,
                                          device["id"])
                    self.addCleanup(heketi_device_enable, heketi_node,
                                    heketi_url, device["id"])
                    continue
                nodes[node_id].append({
                    "device_id": device["id"],
                    "free": free_space
                })

        # Skip test if nodes requirements are not met
        if (len(nodes) < 3
                or not all(map((lambda _l: len(_l) > 1), nodes.values()))):
            raise self.skipTest(
                "Could not find 3 online nodes with 2 online devices "
                "having free space bigger than %dGb." % min_free_space_gb)

        # Calculate size of a potential distributed vol
        if nodes[node_ids[0]][0]["free"] > nodes[node_ids[0]][1]["free"]:
            index = 0
        else:
            index = 1
        vol_size_gb = int(nodes[node_ids[0]][index]["free"] / (1024**2)) + 1
        device_id = nodes[node_ids[0]][index]["device_id"]

        # Create volume with such size that we consume space more than
        # size of smaller disks
        h_volume_name = "autotests-heketi-volume-%s" % utils.get_random_str()
        try:
            self.create_heketi_volume_with_name_and_wait(h_volume_name,
                                                         vol_size_gb,
                                                         json=True)
        except Exception as e:
            # NOTE: rare situation when we need to decrease size of a volume.
            g.log.info("Failed to create '%s'Gb volume. "
                       "Trying to create another one, smaller for 1Gb.")

            if not ('more required' in str(e) and
                    ('Insufficient suitable allocatable extents for '
                     'logical volume' in str(e))):
                raise

            vol_size_gb -= 1
            self.create_heketi_volume_with_name_and_wait(h_volume_name,
                                                         vol_size_gb,
                                                         json=True)

        # Try to 'remove' bigger Heketi disk expecting error,
        # because there is no space on smaller disk to relocate bricks to
        heketi_device_disable(heketi_node, heketi_url, device_id)
        self.addCleanup(heketi_device_enable, heketi_node, heketi_url,
                        device_id)
        try:
            self.assertRaises(AssertionError, heketi_device_remove,
                              heketi_node, heketi_url, device_id)
        except Exception:
            self.addCleanup(heketi_device_disable, heketi_node, heketi_url,
                            device_id)
            raise
    def test_create_volumes_enabling_and_disabling_heketi_devices(self):
        """Validate enable/disable of heketi device"""

        # Get nodes info
        node_id_list = heketi_ops.heketi_node_list(
            self.heketi_client_node, self.heketi_server_url)
        node_info_list = []
        for node_id in node_id_list[0:3]:
            node_info = heketi_ops.heketi_node_info(
                self.heketi_client_node, self.heketi_server_url,
                node_id, json=True)
            node_info_list.append(node_info)

        # Disable 4th and other nodes
        if len(node_id_list) > 3:
            for node in node_id_list[3:]:
                heketi_ops.heketi_node_disable(
                    self.heketi_client_node, self.heketi_server_url, node_id)
                self.addCleanup(
                    heketi_ops.heketi_node_enable, self.heketi_client_node,
                    self.heketi_server_url, node_id)

        # Disable second and other devices on the first 3 nodes
        for node_info in node_info_list[0:3]:
            devices = node_info["devices"]
            self.assertTrue(
                devices, "Node '%s' does not have devices." % node_info["id"])
            if devices[0]["state"].strip().lower() != "online":
                self.skipTest("Test expects first device to be enabled.")
            if len(devices) < 2:
                continue
            for device in node_info["devices"][1:]:
                out = heketi_ops.heketi_device_disable(
                    self.heketi_client_node, self.heketi_server_url,
                    device["id"])
                self.assertTrue(
                    out, "Failed to disable the device %s" % device["id"])
                self.addCleanup(
                    heketi_ops.heketi_device_enable,
                    self.heketi_client_node, self.heketi_server_url,
                    device["id"])

        # Create heketi volume
        out = heketi_ops.heketi_volume_create(
            self.heketi_client_node, self.heketi_server_url, 1, json=True)
        self.assertTrue(out, "Failed to create heketi volume of size 1")
        g.log.info("Successfully created heketi volume of size 1")
        device_id = out["bricks"][0]["device"]
        self.addCleanup(
            heketi_ops.heketi_volume_delete, self.heketi_client_node,
            self.heketi_server_url, out["bricks"][0]["volume"])

        # Disable device
        g.log.info("Disabling '%s' device" % device_id)
        out = heketi_ops.heketi_device_disable(
            self.heketi_client_node, self.heketi_server_url, device_id)
        self.assertTrue(out, "Failed to disable the device %s" % device_id)
        g.log.info("Successfully disabled device %s" % device_id)

        try:
            # Get device info
            g.log.info("Retrieving '%s' device info" % device_id)
            out = heketi_ops.heketi_device_info(
                self.heketi_client_node, self.heketi_server_url,
                device_id, json=True)
            self.assertTrue(out, "Failed to get device info %s" % device_id)
            g.log.info("Successfully retrieved device info %s" % device_id)
            name = out["name"]
            if out["state"].lower().strip() != "offline":
                raise exceptions.ExecutionError(
                    "Device %s is not in offline state." % name)
            g.log.info("Device %s is now offine" % name)

            # Try to create heketi volume
            g.log.info("Creating heketi volume: Expected to fail.")
            try:
                out = heketi_ops.heketi_volume_create(
                    self.heketi_client_node, self.heketi_server_url, 1,
                    json=True)
            except exceptions.ExecutionError:
                g.log.info("Volume was not created as expected.")
            else:
                self.addCleanup(
                    heketi_ops.heketi_volume_delete, self.heketi_client_node,
                    self.heketi_server_url, out["bricks"][0]["volume"])
                msg = "Volume unexpectedly created. Out: %s" % out
                assert False, msg
        finally:
            # Enable the device back
            g.log.info("Enable '%s' device back." % device_id)
            out = heketi_ops.heketi_device_enable(
                self.heketi_client_node, self.heketi_server_url, device_id)
            self.assertTrue(out, "Failed to enable the device %s" % device_id)
            g.log.info("Successfully enabled device %s" % device_id)

        # Get device info
        out = heketi_ops.heketi_device_info(
            self.heketi_client_node, self.heketi_server_url, device_id,
            json=True)
        self.assertTrue(out, ("Failed to get device info %s" % device_id))
        g.log.info("Successfully retrieved device info %s" % device_id)
        name = out["name"]
        if out["state"] != "online":
            raise exceptions.ExecutionError(
                "Device %s is not in online state." % name)

        # Create heketi volume of size
        out = heketi_ops.heketi_volume_create(
            self.heketi_client_node, self.heketi_server_url, 1, json=True)
        self.assertTrue(out, "Failed to create volume of size 1")
        self.addCleanup(
            heketi_ops.heketi_volume_delete, self.heketi_client_node,
            self.heketi_server_url, out["bricks"][0]["volume"])
        g.log.info("Successfully created volume of size 1")
        name = out["name"]

        # Get gluster volume info
        vol_info = get_volume_info('auto_get_gluster_endpoint', volname=name)
        self.assertTrue(vol_info, "Failed to get '%s' volume info." % name)
        g.log.info("Successfully got the '%s' volume info." % name)