def test_brick_multiplex_pids_with_diff_vol_option_values(self):
        """Test Brick Pid's should be same when values of vol options are diff
        """
        h_client, h_url = self.heketi_client_node, self.heketi_server_url
        # Disable heketi nodes except first three nodes
        h_nodes_list = heketi_node_list(h_client, h_url)
        for node_id in h_nodes_list[3:]:
            heketi_node_disable(h_client, h_url, node_id)
            self.addCleanup(heketi_node_enable, h_client, h_url, node_id)

        # Create storage class with diff volumeoptions
        sc1 = self.create_storage_class(volumeoptions='user.heketi.abc 1')
        sc2 = self.create_storage_class(volumeoptions='user.heketi.abc 2')
        # Create PVC's with above SC
        pvc1 = self.create_and_wait_for_pvcs(sc_name=sc1)
        pvc2 = self.create_and_wait_for_pvcs(sc_name=sc2)

        # Get vol info and status
        vol_info1 = get_gluster_vol_info_by_pvc_name(self.node, pvc1[0])
        vol_info2 = get_gluster_vol_info_by_pvc_name(self.node, pvc2[0])
        vol_status1 = get_gluster_vol_status(vol_info1['gluster_vol_id'])
        vol_status2 = get_gluster_vol_status(vol_info2['gluster_vol_id'])

        # Verify vol options
        err_msg = ('Volume option "user.heketi.abc %s" did not got match for '
                   'volume %s in gluster vol info')
        self.assertEqual(
            vol_info1['options']['user.heketi.abc'], '1',
            err_msg % (1, vol_info1['gluster_vol_id']))
        self.assertEqual(
            vol_info2['options']['user.heketi.abc'], '2',
            err_msg % (2, vol_info2['gluster_vol_id']))

        # Get the PID's and match them
        pids1 = set()
        for brick in vol_info1['bricks']['brick']:
            host, bname = brick['name'].split(":")
            pids1.add(vol_status1[host][bname]['pid'])

        pids2 = set()
        for brick in vol_info2['bricks']['brick']:
            host, bname = brick['name'].split(":")
            pids2.add(vol_status2[host][bname]['pid'])

        err_msg = ('Pids of both the volumes %s and %s are expected to be'
                   'same. But got the different Pids "%s" and "%s".' %
                   (vol_info1['gluster_vol_id'], vol_info2['gluster_vol_id'],
                    pids1, pids2))
        self.assertEqual(pids1, pids2, err_msg)
コード例 #2
0
    def check_vol_status(self):
        # Check status of all vols
        status = get_gluster_vol_status('all')

        pids = defaultdict(int)
        down_bricks = 0
        for vol in status.keys():
            for host in status[vol].keys():
                for brick_or_shd in status[vol][host].keys():
                    if status[vol][host][brick_or_shd]['status'] != "1":
                        down_bricks += 1
                    pid = status[vol][host][brick_or_shd]['pid']
                    pids[pid] += 1

        # Get Pids which are running more than 250 bricks and raise exception
        exhausted_pids = [pd for pd in pids.keys() if pids[pd] > 250]

        self.assertFalse(
            (exhausted_pids and down_bricks),
            'Pids {} have more than 250 bricks attached to it. {} bricks or '
            'shd are down.'.format(exhausted_pids, down_bricks))
        self.assertFalse(
            exhausted_pids, 'Pids {} have more than 250 bricks attached to'
            ' it.'.format(exhausted_pids))
        self.assertFalse(
            down_bricks, '{} bricks or shd are down.'.format(down_bricks))
コード例 #3
0
    def test_kill_bhv_fsd_while_es_pod_running(self):
        """Validate killing of bhv fsd won't effect es pod io's"""

        # Fetch pod and PVC names and validate iscsi and multipath
        es_pod, pvc_name = self._get_es_pod_and_verify_iscsi_sessions()

        # Get the bhv name
        gluster_node = list(self._registry_servers_info.keys())[0]
        openshift_ops.switch_oc_project(self._master,
                                        self._registry_project_name)
        bhv_name = self.get_block_hosting_volume_by_pvc_name(
            pvc_name,
            heketi_server_url=self._registry_heketi_server_url,
            gluster_node=gluster_node)

        # Get one of the bricks pid of the bhv
        gluster_volume_status = gluster_ops.get_gluster_vol_status(bhv_name)
        pid = None
        for g_node, g_node_data in gluster_volume_status.items():
            if g_node != gluster_node:
                continue
            for process_name, process_data in g_node_data.items():
                if not process_name.startswith("/var"):
                    continue
                pid = process_data["pid"]
                # When birck is down, pid of the brick is returned as -1.
                # Which is unexepeted situation. So, add appropriate assertion.
                self.assertNotEqual(
                    pid, "-1", "Got unexpected PID (-1) for '{}' gluster vol "
                    "on '{}' node.".format(bhv_name, gluster_node))
                break
            self.assertTrue(
                pid, "Could not find 'pid' in Gluster vol data for '{}' "
                "Gluster node. Data: {}".format(gluster_node,
                                                gluster_volume_status))
            break

        # Kill gluster vol brick process using found pid
        cmd_kill = "kill -9 {}".format(pid)
        cmd_start_vol = "gluster v start {} force".format(bhv_name)
        openshift_ops.cmd_run_on_gluster_pod_or_node(self._master, cmd_kill,
                                                     gluster_node)
        self.addCleanup(openshift_ops.cmd_run_on_gluster_pod_or_node,
                        self._master, cmd_start_vol, gluster_node)
        self.addCleanup(openshift_ops.switch_oc_project, self._master,
                        self._registry_project_name)

        # Run I/O on ES pod
        openshift_ops.switch_oc_project(self._master,
                                        self._logging_project_name)
        file_name = '/elasticsearch/persistent/file1'
        cmd_run_io = 'dd if=/dev/urandom of={} bs=4k count=10000'.format(
            file_name)
        cmd_remove_file = 'rm {}'.format(file_name)
        openshift_ops.oc_rsh(self._master, es_pod, cmd_run_io)
        self.addCleanup(openshift_ops.oc_rsh, self._master, es_pod,
                        cmd_remove_file)
コード例 #4
0
    def _get_bricks_pids(self, vol_name):
        """Return list having bricks pids with gluster pod ip"""
        pids = []

        g_volume_status = get_gluster_vol_status(vol_name)
        self.assertTrue(
            g_volume_status, "Failed to get the gluster volume status for the "
            "volume {}".format(vol_name))
        for g_node, g_node_data in g_volume_status.items():
            for process_name, process_data in g_node_data.items():
                if process_name.startswith("/var"):
                    pid = process_data["pid"]
                    pids.append([g_node, pid])
        return pids
    def test_prometheous_kill_bhv_brick_process(self):
        """Validate kill brick process of block hosting
        volume with prometheus workload running"""

        # Add check for CRS version
        openshift_ops.switch_oc_project(
            self._master, self._registry_project_name)
        if not self.is_containerized_gluster():
            self.skipTest("Skipping this test case as CRS"
                          " version check can not be implemented")

        # Get one of the prometheus pod name and respective pvc name
        openshift_ops.switch_oc_project(
            self._master, self._prometheus_project_name)
        prometheus_pods = openshift_ops.oc_get_pods(
            self._master, selector=self._prometheus_resources_selector)
        if not prometheus_pods:
            self.skipTest(
                prometheus_pods, "Skipping test as prometheus"
                " pod is not present")

        # Validate iscsi and multipath
        prometheus_pod = list(prometheus_pods.keys())[0]
        pvc_name = openshift_ops.oc_get_custom_resource(
            self._master, "pod",
            ":.spec.volumes[*].persistentVolumeClaim.claimName",
            prometheus_pod)
        self.assertTrue(pvc_name, "Failed to get PVC name")
        pvc_name = pvc_name[0]
        self.verify_iscsi_sessions_and_multipath(
            pvc_name, prometheus_pod, rtype='pod',
            heketi_server_url=self._registry_heketi_server_url,
            is_registry_gluster=True)

        # Try to fetch metric from prometheus pod
        self._fetch_metric_from_promtheus_pod(
            metric='heketi_device_brick_count')

        # Kill the brick process of a BHV
        gluster_node = list(self._registry_servers_info.keys())[0]
        openshift_ops.switch_oc_project(
            self._master, self._registry_project_name)
        bhv_name = self.get_block_hosting_volume_by_pvc_name(
            pvc_name, heketi_server_url=self._registry_heketi_server_url,
            gluster_node=gluster_node, ocp_client_node=self._master)
        vol_status = gluster_ops.get_gluster_vol_status(bhv_name)
        gluster_node_ip, brick_pid = None, None
        for g_node, g_node_data in vol_status.items():
            for process_name, process_data in g_node_data.items():
                if process_name.startswith("/var"):
                    gluster_node_ip = g_node
                    brick_pid = process_data["pid"]
                    break
            if gluster_node_ip and brick_pid:
                break
        self.assertIsNotNone(brick_pid, "Could not find pid for brick")
        cmd = "kill -9 {}".format(brick_pid)
        openshift_ops.cmd_run_on_gluster_pod_or_node(
            self._master, cmd, gluster_node_ip)
        self.addCleanup(self._guster_volume_cleanup, bhv_name)

        # Check if the brick-process has been killed
        killed_pid_cmd = (
            "ps -p {} -o pid --no-headers".format(brick_pid))
        try:
            openshift_ops.cmd_run_on_gluster_pod_or_node(
                self._master, killed_pid_cmd, gluster_node_ip)
        except exceptions.ExecutionError:
            g.log.info("Brick process {} was killed"
                       "successfully".format(brick_pid))

        # Try to fetch metric from prometheus pod
        openshift_ops.switch_oc_project(
            self._master, self._prometheus_project_name)
        self._fetch_metric_from_promtheus_pod(
            metric='heketi_device_brick_count')

        # Start the bhv using force
        openshift_ops.switch_oc_project(
            self._master, self._registry_project_name)
        start_vol, _, _ = volume_ops.volume_start(
            gluster_node_ip, bhv_name, force=True)
        self.assertFalse(
            start_vol, "Failed to start volume {}"
            " using force".format(bhv_name))

        # Validate iscsi and multipath
        openshift_ops.switch_oc_project(
            self._master, self._prometheus_project_name)
        self.verify_iscsi_sessions_and_multipath(
            pvc_name, prometheus_pod, rtype='pod',
            heketi_server_url=self._registry_heketi_server_url,
            is_registry_gluster=True)

        # Try to fetch metric from prometheus pod
        self._fetch_metric_from_promtheus_pod(
            metric='heketi_device_brick_count')