def test_promethoues_pods_and_pvcs(self):
        """Validate prometheus pods and PVC"""
        # Wait for PVCs to be bound
        pod_names, pvc_names = self._get_pod_names_and_pvc_names()
        openshift_ops.wait_for_pvcs_be_bound(self._master, pvc_names)

        # Validate that there should be no or zero pods in non-running state
        field_selector, pod_count = "status.phase!=Running", 0
        openshift_ops.wait_for_pods_be_ready(
            self._master, pod_count, field_selector=field_selector)

        # Validate iscsi and multipath
        for (pvc_name, pod_name) in zip(pvc_names, pod_names):
            self.verify_iscsi_sessions_and_multipath(
                pvc_name, pod_name[0], rtype='pod',
                heketi_server_url=self._registry_heketi_server_url,
                is_registry_gluster=True)

        # Try to fetch metric from prometheus pod
        self._fetch_metric_from_promtheus_pod(metric='kube_node_info')
    def create_and_wait_for_pvcs(
            self, pvc_size=1, pvc_name_prefix="autotests-pvc", pvc_amount=1,
            sc_name=None, timeout=600, wait_step=10, skip_waiting=False,
            skip_cleanup=False):
        """Create multiple PVC's not waiting for it

        Args:
            pvc_size (int): size of PVC, default value is 1
            pvc_name_prefix (str): volume prefix for each PVC, default value is
                                   'autotests-pvc'
            pvc_amount (int): number of PVC's, default value is 1
            sc_name (str): storage class to create PVC, default value is None,
                           which will cause automatic creation of sc.
            timeout (int): timeout time for waiting for PVC's to get bound
            wait_step (int): waiting time between each try of PVC status check
            skip_waiting (bool): boolean value which defines whether
                                 we need to wait for PVC creation or not.
        Returns:
            List: list of PVC names
        """
        node = self.ocp_client[0]

        # Create storage class if not specified
        if not sc_name:
            if getattr(self, "sc_name", ""):
                sc_name = self.sc_name
            else:
                sc_name = self.create_storage_class(skip_cleanup=skip_cleanup)

        # Create PVCs
        pvc_names = []
        for i in range(pvc_amount):
            pvc_name = oc_create_pvc(
                node, sc_name, pvc_name_prefix=pvc_name_prefix,
                pvc_size=pvc_size)
            pvc_names.append(pvc_name)
        if not skip_cleanup:
            self.addCleanup(
                wait_for_resources_absence, node, 'pvc', pvc_names)

        # Wait for PVCs to be in bound state
        try:
            if not skip_waiting:
                wait_for_pvcs_be_bound(node, pvc_names, timeout, wait_step)
        finally:
            if skip_cleanup:
                return pvc_names

            if get_openshift_version() < "3.9":
                reclaim_policy = "Delete"
            else:
                reclaim_policy = oc_get_custom_resource(
                    node, 'sc', ':.reclaimPolicy', sc_name)[0]

            for pvc_name in pvc_names:
                if reclaim_policy == 'Retain':
                    pv_name = get_pv_name_from_pvc(node, pvc_name)
                    if not pv_name and skip_waiting:
                        continue
                    self.addCleanup(oc_delete, node, 'pv', pv_name,
                                    raise_on_absence=False)
                    custom = (r':.metadata.annotations."gluster\.kubernetes'
                              r'\.io\/heketi\-volume\-id"')
                    vol_id = oc_get_custom_resource(
                        node, 'pv', custom, pv_name)[0]
                    if self.sc.get('provisioner') == "kubernetes.io/glusterfs":
                        self.addCleanup(heketi_volume_delete,
                                        self.heketi_client_node,
                                        self.heketi_server_url, vol_id,
                                        raise_on_error=False)
                    else:
                        self.addCleanup(heketi_blockvolume_delete,
                                        self.heketi_client_node,
                                        self.heketi_server_url, vol_id,
                                        raise_on_error=False)
                self.addCleanup(oc_delete, node, 'pvc', pvc_name,
                                raise_on_absence=False)
        return pvc_names
    def test_targetcli_failure_during_block_pvc_creation(self):
        h_node, h_server = self.heketi_client_node, self.heketi_server_url

        # Disable redundant nodes and leave just 3 nodes online
        h_node_id_list = heketi_node_list(h_node, h_server)
        self.assertGreater(len(h_node_id_list), 2)
        for node_id in h_node_id_list[3:]:
            heketi_node_disable(h_node, h_server, node_id)
            self.addCleanup(heketi_node_enable, h_node, h_server, node_id)

        # Gather info about the Gluster node we are going to use for killing
        # targetcli processes.
        chosen_g_node_id = h_node_id_list[0]
        chosen_g_node_info = heketi_node_info(h_node,
                                              h_server,
                                              chosen_g_node_id,
                                              json=True)
        chosen_g_node_ip = chosen_g_node_info['hostnames']['storage'][0]
        chosen_g_node_hostname = chosen_g_node_info['hostnames']['manage'][0]
        chosen_g_node_ip_and_hostname = set(
            (chosen_g_node_ip, chosen_g_node_hostname))

        g_pods = oc_get_custom_resource(
            self.node,
            'pod', [
                ':.metadata.name', ':.status.hostIP', ':.status.podIP',
                ':.spec.nodeName'
            ],
            selector='glusterfs-node=pod')
        if g_pods and g_pods[0]:
            for g_pod in g_pods:
                if chosen_g_node_ip_and_hostname.intersection(set(g_pod[1:])):
                    host_to_run_cmds = self.node
                    g_pod_prefix, g_pod = 'oc exec %s -- ' % g_pod[0], g_pod[0]
                    break
            else:
                err_msg = (
                    'Failed to find Gluster pod filtering it by following IPs '
                    'and hostnames: %s\nFound following Gluster pods: %s') % (
                        chosen_g_node_ip_and_hostname, g_pods)
                g.log.error(err_msg)
                raise AssertionError(err_msg)
        else:
            host_to_run_cmds, g_pod_prefix, g_pod = chosen_g_node_ip, '', ''

        # Schedule deletion of targetcli process
        file_for_bkp, pvc_number = "~/.targetcli/prefs.bin", 10
        self.cmd_run("%scp %s %s_backup" %
                     (g_pod_prefix, file_for_bkp, file_for_bkp),
                     hostname=host_to_run_cmds)
        self.addCleanup(self.cmd_run,
                        "%srm -f %s_backup" % (g_pod_prefix, file_for_bkp),
                        hostname=host_to_run_cmds)
        kill_targetcli_services_cmd = (
            "while true; do "
            "  %spkill targetcli || echo 'failed to kill targetcli process'; "
            "done" % g_pod_prefix)
        loop_for_killing_targetcli_process = g.run_async(
            host_to_run_cmds, kill_targetcli_services_cmd, "root")
        try:
            # Create bunch of PVCs
            sc_name, pvc_names = self.create_storage_class(), []
            for i in range(pvc_number):
                pvc_names.append(oc_create_pvc(self.node, sc_name, pvc_size=1))
            self.addCleanup(wait_for_resources_absence, self.node, 'pvc',
                            pvc_names)
            self.addCleanup(oc_delete, self.node, 'pvc', ' '.join(pvc_names))

            # Check that we get expected number of provisioning errors
            timeout, wait_step, succeeded_pvcs, failed_pvcs = 120, 1, [], []
            _waiter, err_msg = Waiter(timeout=timeout, interval=wait_step), ""
            for pvc_name in pvc_names:
                _waiter._attempt = 0
                for w in _waiter:
                    events = get_events(self.node,
                                        pvc_name,
                                        obj_type="PersistentVolumeClaim")
                    for event in events:
                        if event['reason'] == 'ProvisioningSucceeded':
                            succeeded_pvcs.append(pvc_name)
                            break
                        elif event['reason'] == 'ProvisioningFailed':
                            failed_pvcs.append(pvc_name)
                            break
                    else:
                        continue
                    break
                if w.expired:
                    err_msg = (
                        "Failed to get neither 'ProvisioningSucceeded' nor "
                        "'ProvisioningFailed' statuses for all the PVCs in "
                        "time. Timeout was %ss, interval was %ss." %
                        (timeout, wait_step))
                    g.log.error(err_msg)
                    raise AssertionError(err_msg)
            self.assertGreater(len(failed_pvcs), len(succeeded_pvcs))
        finally:
            # Restore targetcli workability
            loop_for_killing_targetcli_process._proc.terminate()

            # Revert breakage back which can be caused by BZ-1769426
            check_bkp_file_size_cmd = ("%sls -lah %s | awk '{print $5}'" %
                                       (g_pod_prefix, file_for_bkp))
            bkp_file_size = self.cmd_run(check_bkp_file_size_cmd,
                                         hostname=host_to_run_cmds).strip()
            if bkp_file_size == "0":
                self.cmd_run("%smv %s_backup %s" %
                             (g_pod_prefix, file_for_bkp, file_for_bkp),
                             hostname=host_to_run_cmds)
                breakage_err_msg = (
                    "File located at '%s' was corrupted (zero size) on the "
                    "%s. Looks like BZ-1769426 took effect. \n"
                    "Don't worry, it has been restored after test failure." %
                    (file_for_bkp, "'%s' Gluster pod" % g_pod
                     if g_pod else "'%s' Gluster node" % chosen_g_node_ip))
                g.log.error(breakage_err_msg)
                if err_msg:
                    breakage_err_msg = "%s\n%s" % (err_msg, breakage_err_msg)
                raise AssertionError(breakage_err_msg)

        # Wait for all the PVCs to be in bound state
        wait_for_pvcs_be_bound(self.node, pvc_names, timeout=300, wait_step=5)
Ejemplo n.º 4
0
    def test_dev_path_file_volume_delete(self):
        """Validate device path name changes the deletion of
           already existing file volumes
        """

        pvc_size, pvc_amount = 2, 5
        vol_details, pvc_names = [], []

        # Create PVC's
        sc_name = self.create_storage_class()
        for i in range(0, pvc_amount):
            pvc_name = openshift_ops.oc_create_pvc(self.node,
                                                   sc_name,
                                                   pvc_size=pvc_size)
            pvc_names.append(pvc_name)
            self.addCleanup(openshift_ops.wait_for_resource_absence, self.node,
                            'pvc', pvc_name)
            self.addCleanup(openshift_ops.oc_delete,
                            self.node,
                            'pvc',
                            pvc_name,
                            raise_on_absence=False)

        # Wait for PVC's to be bound
        openshift_ops.wait_for_pvcs_be_bound(self.node, pvc_names)

        # Get Volumes name and validate volumes count
        for pvc_name in pvc_names:
            pv_name = openshift_ops.get_pv_name_from_pvc(self.node, pvc_name)
            volume_name = openshift_ops.get_vol_names_from_pv(
                self.node, pv_name)
            vol_details.append(volume_name)

        # Verify file volumes count
        self.validate_file_volumes_count(self.h_node, self.h_server,
                                         self.node_ip)

        # Collect pvs info and detach disks and get pvs info
        pvs_info_before = openshift_storage_libs.get_pvs_info(
            self.node, self.node_ip, self.devices_list, raise_on_error=False)
        self.detach_and_attach_vmdk(self.vm_name, self.node_hostname,
                                    self.devices_list)
        pvs_info_after = openshift_storage_libs.get_pvs_info(
            self.node, self.node_ip, self.devices_list, raise_on_error=False)

        # Compare pvs info before and after
        for (path, uuid, vg_name), (_path, _uuid,
                                    _vg_name) in zip(pvs_info_before[:-1],
                                                     pvs_info_after[1:]):
            self.assertEqual(
                uuid, _uuid, "pv_uuid check failed. Expected:{},"
                "Actual: {}".format(uuid, _uuid))
            self.assertEqual(
                vg_name, _vg_name, "vg_name check failed. Expected:"
                "{}, Actual:{}".format(vg_name, _vg_name))

        # Delete created PVC's
        for pvc_name in pvc_names:
            openshift_ops.oc_delete(self.node, 'pvc', pvc_name)

        # Wait for resource absence and get volume list
        openshift_ops.wait_for_resources_absence(self.node, 'pvc', pvc_names)
        vol_list = volume_ops.get_volume_list(self.node_ip)
        self.assertIsNotNone(vol_list, "Failed to get volumes list")

        # Validate volumes created are not present
        for vol in vol_details:
            self.assertNotIn(vol, vol_list,
                             "Failed to delete volume {}".format(vol))