예제 #1
0
    def reboot_gluster_node_and_wait_for_services(self):
        gluster_node_ip = (
            g.config["gluster_servers"][self.gluster_servers[0]]["storage"])
        gluster_pod = list(
            filter(lambda pod: (pod["pod_host_ip"] == gluster_node_ip),
                   get_ocp_gluster_pod_details(self.oc_node)))
        if not gluster_pod:
            raise ExecutionError("Gluster pod Host IP '%s' not matched." %
                                 gluster_node_ip)
        gluster_pod = gluster_pod[0]["pod_name"]
        self.addCleanup(wait_for_pod_be_ready, self.oc_node, gluster_pod)
        node_reboot_by_command(gluster_node_ip, timeout=600, wait_step=10)

        # wait for the gluster pod to be in 'Running' state
        wait_for_pod_be_ready(self.oc_node, gluster_pod)

        # glusterd and gluster-blockd service should be up and running
        services = (("glusterd", "running"), ("gluster-blockd", "running"),
                    ("tcmu-runner", "running"), ("gluster-block-target",
                                                 "exited"))
        for service, state in services:
            check_service_status_on_pod(self.oc_node, gluster_pod, service,
                                        "active", state)
    def test_dynamic_provisioning_glusterfile_gluster_pod_or_node_failure(
            self):
        """Create glusterblock PVC when gluster pod or node is down."""
        mount_path = "/mnt"
        datafile_path = '%s/fake_file_for_%s' % (mount_path, self.id())

        # Create secret and storage class
        self.create_storage_class()

        # Create PVC
        pvc_name = self.create_and_wait_for_pvc()

        # Create app POD with attached volume
        pod_name = oc_create_tiny_pod_with_volume(
            self.node,
            pvc_name,
            "test-pvc-mount-on-app-pod",
            mount_path=mount_path,
            image=self.io_container_image_cirros)
        self.addCleanup(wait_for_resource_absence, self.node, 'pod', pod_name)
        self.addCleanup(oc_delete, self.node, 'pod', pod_name)

        # Wait for app POD be up and running
        wait_for_pod_be_ready(self.node, pod_name, timeout=60, wait_step=2)

        # Run IO in background
        io_cmd = "oc rsh %s dd if=/dev/urandom of=%s bs=1000K count=900" % (
            pod_name, datafile_path)
        async_io = g.run_async(self.node, io_cmd, "root")

        # Check for containerized Gluster
        if self.is_containerized_gluster():
            # Pick up one of the hosts which stores PV brick (4+ nodes case)
            gluster_pod_data = get_gluster_pod_names_by_pvc_name(
                self.node, pvc_name)[0]

            # Delete glusterfs POD from chosen host and wait for
            # spawn of new one
            oc_delete(self.node, 'pod', gluster_pod_data["pod_name"])
            cmd = ("oc get pods -o wide | grep glusterfs | grep %s | "
                   "grep -v Terminating | awk '{print $1}'") % (
                       gluster_pod_data["pod_hostname"])
            for w in Waiter(600, 15):
                new_gluster_pod_name = self.cmd_run(cmd)
                if new_gluster_pod_name:
                    break
            if w.expired:
                error_msg = "exceeded timeout, new gluster pod not created"
                g.log.error(error_msg)
                raise AssertionError(error_msg)
            g.log.info("new gluster pod name is %s" % new_gluster_pod_name)
            wait_for_pod_be_ready(self.node, new_gluster_pod_name)
        else:
            pvc_hosting_node_ip = get_gluster_host_ips_by_pvc_name(
                self.node, pvc_name)[0]
            heketi_nodes = heketi_node_list(self.heketi_client_node,
                                            self.heketi_server_url)
            node_ip_for_reboot = None
            for heketi_node in heketi_nodes:
                heketi_node_ip = heketi_node_info(
                    self.heketi_client_node,
                    self.heketi_server_url,
                    heketi_node,
                    json=True)["hostnames"]["storage"][0]
                if heketi_node_ip == pvc_hosting_node_ip:
                    node_ip_for_reboot = heketi_node_ip
                    break

            if not node_ip_for_reboot:
                raise AssertionError(
                    "Gluster node IP %s not matched with heketi node %s" %
                    (pvc_hosting_node_ip, heketi_node_ip))

            node_reboot_by_command(node_ip_for_reboot)

        # Check that async IO was not interrupted
        ret, out, err = async_io.async_communicate()
        self.assertEqual(ret, 0, "IO %s failed on %s" % (io_cmd, self.node))
    def test_heketi_metrics_validation_with_node_reboot(self):
        """Validate heketi metrics after node reboot using prometheus"""

        initial_metrics, final_metrics = {}, {}

        # Use storage project
        openshift_ops.switch_oc_project(
            self._master, self.storage_project_name)

        # Get initial metrics result
        h_node, h_server = self.heketi_client_node, self.heketi_server_url
        initial_metrics = tuple(
            heketi_ops.get_heketi_metrics(h_node, h_server).get(metric)[0]
            for metric in self.metrics)

        # Use prometheus project
        openshift_ops.switch_oc_project(
            self._master, self._prometheus_project_name)

        # Get initial prometheus result
        initial_prometheus = self._get_and_manipulate_metric_data(
            self.metrics)

        # Get hosted node IP of heketi pod
        openshift_ops.switch_oc_project(
            self._master, self.storage_project_name)
        heketi_pod = openshift_ops.get_pod_name_from_dc(
            self._master, self.heketi_dc_name)
        heketi_node = openshift_ops.oc_get_custom_resource(
            self._master, 'pod', '.:spec.nodeName', heketi_pod)[0]

        # Reboot the node on which heketi pod is scheduled
        self.addCleanup(
            self._check_heketi_and_gluster_pod_after_node_reboot, heketi_node)
        node_ops.node_reboot_by_command(heketi_node)

        # Wait node to become NotReady
        custom = r'":.status.conditions[?(@.type==\"Ready\")]".status'
        for w in waiter.Waiter(300, 10):
            status = openshift_ops.oc_get_custom_resource(
                self._master, 'node', custom, heketi_node)
            if status[0] == 'False':
                break
        if w.expired:
            raise exceptions.ExecutionError(
                "Failed to bring down node {}".format(heketi_node))

        # Wait for node to become ready
        openshift_ops.wait_for_ocp_node_be_ready(self._master, heketi_node)

        # Wait for heketi and glusterfs pod to become ready
        self._check_heketi_and_gluster_pod_after_node_reboot(heketi_node)

        # Use prometheus project
        openshift_ops.switch_oc_project(
            self._master, self._prometheus_project_name)

        # Get final metrics result
        final_metrics = tuple(
            heketi_ops.get_heketi_metrics(h_node, h_server).get(metric)[0]
            for metric in self.metrics)

        # Get final prometheus result
        final_prometheus = self._get_and_manipulate_metric_data(
            self.metrics)

        err_msg = "Initial value {} is not same as final value {}"
        self.assertEqual(
            initial_metrics, final_metrics, err_msg.format(
                initial_metrics, final_metrics))
        self.assertEqual(
            initial_prometheus, final_prometheus, err_msg.format(
                initial_prometheus, final_prometheus))
예제 #4
0
    def test_udev_usage_in_container(self):
        """Validate LVM inside container does not use udev"""

        # Skip the TC if independent mode deployment
        if not self.is_containerized_gluster():
            self.skipTest("Skipping this test case as it needs to run on "
                          "converged mode deployment")

        h_client, h_url = self.heketi_client_node, self.heketi_server_url
        server_info = list(g.config.get('gluster_servers').values())[0]
        server_node = server_info.get('manage')
        additional_device = server_info.get('additional_devices')[0]

        # command to run pvscan
        cmd_pvscan = "timeout 300 pvscan"

        # Get pod name from on host
        for pod_info in self.pod_name:
            if pod_info.get('pod_hostname') == server_node:
                pod_name = pod_info.get('pod_name')
                break

        # Create file volume
        vol_info = heketi_ops.heketi_volume_create(h_client,
                                                   h_url,
                                                   self.volume_size,
                                                   json=True)
        self.addCleanup(heketi_ops.heketi_volume_delete, h_client, h_url,
                        vol_info.get("id"))

        # Create block volume
        block_vol_info = heketi_ops.heketi_blockvolume_create(h_client,
                                                              h_url,
                                                              self.volume_size,
                                                              json=True)
        self.addCleanup(heketi_ops.heketi_blockvolume_delete, h_client, h_url,
                        block_vol_info.get("id"))

        # Check dmeventd service in container
        err_msg = "dmeventd.service is running on setup"
        with self.assertRaises(AssertionError, msg=err_msg):
            openshift_ops.oc_rsh(self.oc_node, pod_name,
                                 "systemctl is-active dmeventd.service")

        # Service dmeventd should not be running in background
        with self.assertRaises(AssertionError, msg=err_msg):
            openshift_ops.oc_rsh(self.oc_node, pod_name,
                                 "ps aux | grep dmeventd.service")

        # Perform a pvscan in contaier
        openshift_ops.oc_rsh(self.oc_node, pod_name, cmd_pvscan)

        # Get heketi node to add new device
        heketi_node_list = heketi_ops.heketi_node_list(h_client, h_url)
        for h_node_id in heketi_node_list:
            h_node_info = heketi_ops.heketi_node_info(h_client,
                                                      h_url,
                                                      h_node_id,
                                                      json=True)
            h_node_host = h_node_info.get('hostnames', {}).get('manage')[0]
            if h_node_host == server_node:
                break

        # Add new device to the node
        heketi_ops.heketi_device_add(h_client, h_url, additional_device,
                                     h_node_id)
        h_node_info = heketi_ops.heketi_node_info(h_client,
                                                  h_url,
                                                  h_node_id,
                                                  json=True)
        h_device_id = [
            device.get('id') for device in h_node_info.get('devices')
            if device.get('name') == additional_device
        ]
        self.addCleanup(heketi_ops.heketi_device_delete, h_client, h_url,
                        h_device_id[0])
        self.addCleanup(heketi_ops.heketi_device_remove, h_client, h_url,
                        h_device_id[0])
        self.addCleanup(heketi_ops.heketi_device_disable, h_client, h_url,
                        h_device_id[0])

        # Reboot the node on which device is added
        self.addCleanup(self._check_heketi_and_gluster_pod_after_node_reboot,
                        server_node)
        node_ops.node_reboot_by_command(server_node)

        # Wait node to become NotReady
        custom = r'":.status.conditions[?(@.type==\"Ready\")]".status'
        for w in waiter.Waiter(300, 10):
            status = openshift_ops.oc_get_custom_resource(
                self.oc_node, 'node', custom, server_node)
            if status[0] == 'False':
                break
        if w.expired:
            raise exceptions.ExecutionError(
                "Failed to bring node down {}".format(server_node))

        # Wait for node to become ready
        openshift_ops.wait_for_ocp_node_be_ready(self.oc_node, server_node)

        # Wait for heketi and glusterfs pod to become ready
        self._check_heketi_and_gluster_pod_after_node_reboot(server_node)

        # Perform a pvscan in contaier
        openshift_ops.oc_rsh(self.oc_node, pod_name, cmd_pvscan)