Example #1
0
    def setUpClass(cls):
        """Initialize all the variables necessary for test cases."""
        super(BaseClass, cls).setUpClass()

        # Initializes OCP config variables
        cls.ocp_servers_info = g.config['ocp_servers']
        cls.ocp_master_node = list(g.config['ocp_servers']['master'].keys())
        cls.ocp_master_node_info = g.config['ocp_servers']['master']
        cls.ocp_client = list(g.config['ocp_servers']['client'].keys())
        cls.ocp_client_info = g.config['ocp_servers']['client']
        cls.ocp_nodes = list(g.config['ocp_servers']['nodes'].keys())
        cls.ocp_nodes_info = g.config['ocp_servers']['nodes']

        # Initializes storage project config variables
        openshift_config = g.config.get("cns", g.config.get("openshift"))
        cls.storage_project_name = openshift_config.get(
            'storage_project_name',
            openshift_config.get('setup', {}).get('cns_project_name'))

        # Initializes heketi config variables
        heketi_config = openshift_config['heketi_config']
        cls.heketi_dc_name = heketi_config['heketi_dc_name']
        cls.heketi_service_name = heketi_config['heketi_service_name']
        cls.heketi_client_node = heketi_config['heketi_client_node']
        cls.heketi_server_url = heketi_config['heketi_server_url']
        cls.heketi_cli_user = heketi_config['heketi_cli_user']
        cls.heketi_cli_key = heketi_config['heketi_cli_key']

        cls.gluster_servers = list(g.config['gluster_servers'].keys())
        cls.gluster_servers_info = g.config['gluster_servers']

        cls.storage_classes = openshift_config['dynamic_provisioning'][
            'storage_classes']
        cls.sc = cls.storage_classes.get(
            'storage_class1', cls.storage_classes.get('file_storage_class'))
        cmd = "echo -n %s | base64" % cls.heketi_cli_key
        ret, out, err = g.run(cls.ocp_master_node[0], cmd, "root")
        if ret != 0:
            raise ExecutionError("failed to execute cmd %s on %s out: %s "
                                 "err: %s" %
                                 (cmd, cls.ocp_master_node[0], out, err))
        cls.secret_data_key = out.strip()

        # Checks if heketi server is alive
        if not hello_heketi(cls.heketi_client_node, cls.heketi_server_url):
            raise ConfigError("Heketi server %s is not alive" %
                              cls.heketi_server_url)

        # Switch to the storage project
        if not switch_oc_project(cls.ocp_master_node[0],
                                 cls.storage_project_name):
            raise ExecutionError("Failed to switch oc project on node %s" %
                                 cls.ocp_master_node[0])

        if 'glustotest_run_id' not in g.config:
            g.config['glustotest_run_id'] = (
                datetime.datetime.now().strftime('%H_%M_%d_%m_%Y'))
        cls.glustotest_run_id = g.config['glustotest_run_id']
        msg = "Setupclass: %s : %s" % (cls.__name__, cls.glustotest_run_id)
        g.log.info(msg)
    def test_pvc_resize_while_ios_are_running(self):
        """Re-size PVC  while IO's are running"""

        # Create an SC, PVC and app pod
        sc_name = self.create_storage_class(create_vol_name_prefix=True,
                                            allow_volume_expansion=True)
        pvc_name = self.create_and_wait_for_pvc(sc_name=sc_name, pvc_size=1)
        dc_name, pod_name = self.create_dc_with_pvc(pvc_name)

        # Run io on the pod for 5 minutes in background
        cmd_io = ('timeout 5m bash -c -- "while true; do oc exec  {} dd '
                  'if=/dev/urandom of=/mnt/f1 bs=100K count=2000; '
                  'done"'.format(pod_name))
        proc = g.run_async(host=self.node, command=cmd_io)

        # Resize PVC while io's are running and validate resize operation
        resize_pvc(self.node, pvc_name, 2)
        verify_pvc_size(self.node, pvc_name, 2)
        pv_name = get_pv_name_from_pvc(self.node, pvc_name)
        verify_pv_size(self.node, pv_name, 2)

        # Check if timeout command and ios are successful
        ret, _, err = proc.async_communicate()
        msg = "command terminated with exit code"
        if ret != 124 or msg in str(err):
            raise ExecutionError("Failed to run io, error {}".format(str(err)))
Example #3
0
    def test_pv_resize_with_prefix_for_name_and_size(
            self, create_vol_name_prefix=False, valid_size=True):
        """Validate PV resize with and without name prefix"""
        dir_path = "/mnt/"
        node = self.ocp_client[0]

        # Create PVC
        self.create_storage_class(
            allow_volume_expansion=True,
            create_vol_name_prefix=create_vol_name_prefix)
        pvc_name = self.create_and_wait_for_pvc()

        # Create DC with POD and attached PVC to it.
        dc_name = oc_create_app_dc_with_io(node, pvc_name)
        self.addCleanup(oc_delete, node, 'dc', dc_name)
        self.addCleanup(scale_dc_pod_amount_and_wait, node, dc_name, 0)

        pod_name = get_pod_name_from_dc(node, dc_name)
        wait_for_pod_be_ready(node, pod_name)
        if create_vol_name_prefix:
            ret = heketi_ops.verify_volume_name_prefix(
                node, self.sc['volumenameprefix'], self.sc['secretnamespace'],
                pvc_name, self.heketi_server_url)
            self.assertTrue(ret, "verify volnameprefix failed")
        cmd = ("dd if=/dev/urandom of=%sfile " "bs=100K count=1000") % dir_path
        ret, out, err = oc_rsh(node, pod_name, cmd)
        self.assertEqual(ret, 0,
                         "Failed to execute command %s on %s" % (cmd, node))
        pv_name = get_pv_name_from_pvc(node, pvc_name)

        # If resize size is invalid then size should not change
        if valid_size:
            cmd = ("dd if=/dev/urandom of=%sfile2 "
                   "bs=100K count=10000") % dir_path
            with self.assertRaises(AssertionError):
                ret, out, err = oc_rsh(node, pod_name, cmd)
                msg = ("Command '%s' was expected to fail on '%s' node. "
                       "But it returned following: ret is '%s', err is '%s' "
                       "and out is '%s'" % (cmd, node, ret, err, out))
                raise ExecutionError(msg)
            pvc_size = 2
            resize_pvc(node, pvc_name, pvc_size)
            verify_pvc_size(node, pvc_name, pvc_size)
            verify_pv_size(node, pv_name, pvc_size)
        else:
            invalid_pvc_size = 'ten'
            with self.assertRaises(AssertionError):
                resize_pvc(node, pvc_name, invalid_pvc_size)
            verify_pvc_size(node, pvc_name, 1)
            verify_pv_size(node, pv_name, 1)

        oc_delete(node, 'pod', pod_name)
        wait_for_resource_absence(node, 'pod', pod_name)
        pod_name = get_pod_name_from_dc(node, dc_name)
        wait_for_pod_be_ready(node, pod_name)
        cmd = ("dd if=/dev/urandom of=%sfile_new "
               "bs=50K count=10000") % dir_path
        ret, out, err = oc_rsh(node, pod_name, cmd)
        self.assertEqual(ret, 0,
                         "Failed to execute command %s on %s" % (cmd, node))
Example #4
0
    def test_delete_heketidb_volume(self):
        """Method to test heketidb volume deletion via heketi-cli."""
        for i in range(0, 2):
            volume_info = heketi_ops.heketi_volume_create(
                self.heketi_client_node, self.heketi_server_url, 10, json=True)
            self.addCleanup(heketi_ops.heketi_volume_delete,
                            self.heketi_client_node, self.heketi_server_url,
                            volume_info["id"])

        volume_list_info = heketi_ops.heketi_volume_list(
            self.heketi_client_node, self.heketi_server_url, json=True)

        self.assertTrue(volume_list_info["volumes"],
                        "Heketi volume list empty.")

        for volume_id in volume_list_info["volumes"]:
            volume_info = heketi_ops.heketi_volume_info(
                self.heketi_client_node,
                self.heketi_server_url,
                volume_id,
                json=True)

            if volume_info["name"] == "heketidbstorage":
                self.assertRaises(AssertionError,
                                  heketi_ops.heketi_volume_delete,
                                  self.heketi_client_node,
                                  self.heketi_server_url, volume_id)
                return
        raise ExecutionError(
            "Warning: heketidbstorage doesn't exist in list of volumes")
    def test_dynamic_provisioning_glusterfile_glusterpod_failure(self):
        """Create glusterblock PVC when gluster pod is down."""

        # Check that we work with containerized Gluster
        if not self.is_containerized_gluster():
            self.skipTest("Only containerized Gluster clusters are supported.")

        mount_path = "/mnt"
        datafile_path = '%s/fake_file_for_%s' % (mount_path, self.id())

        # Create secret and storage class
        self.create_storage_class()

        # Create PVC
        pvc_name = self.create_and_wait_for_pvc()

        # Create app POD with attached volume
        pod_name = oc_create_tiny_pod_with_volume(self.node,
                                                  pvc_name,
                                                  "test-pvc-mount-on-app-pod",
                                                  mount_path=mount_path)
        self.addCleanup(wait_for_resource_absence, self.node, 'pod', pod_name)
        self.addCleanup(oc_delete, self.node, 'pod', pod_name)

        # Wait for app POD be up and running
        wait_for_pod_be_ready(self.node, pod_name, timeout=60, wait_step=2)

        # Run IO in background
        io_cmd = "oc rsh %s dd if=/dev/urandom of=%s bs=1000K count=900" % (
            pod_name, datafile_path)
        async_io = g.run_async(self.node, io_cmd, "root")

        # Pick up one of the hosts which stores PV brick (4+ nodes case)
        gluster_pod_data = get_gluster_pod_names_by_pvc_name(
            self.node, pvc_name)[0]

        # Delete glusterfs POD from chosen host and wait for spawn of new one
        oc_delete(self.node, 'pod', gluster_pod_data["pod_name"])
        cmd = ("oc get pods -o wide | grep glusterfs | grep %s | "
               "grep -v Terminating | awk '{print $1}'") % (
                   gluster_pod_data["host_name"])
        for w in Waiter(600, 15):
            out = self.cmd_run(cmd)
            new_gluster_pod_name = out.strip().split("\n")[0].strip()
            if not new_gluster_pod_name:
                continue
            else:
                break
        if w.expired:
            error_msg = "exceeded timeout, new gluster pod not created"
            g.log.error(error_msg)
            raise ExecutionError(error_msg)
        new_gluster_pod_name = out.strip().split("\n")[0].strip()
        g.log.info("new gluster pod name is %s" % new_gluster_pod_name)
        wait_for_pod_be_ready(self.node, new_gluster_pod_name)

        # Check that async IO was not interrupted
        ret, out, err = async_io.async_communicate()
        self.assertEqual(ret, 0, "IO %s failed on %s" % (io_cmd, self.node))
    def test_delete_heketidb_volume(self):
        """
        Method to test heketidb volume deletion via heketi-cli
        """
        heketidbexists = False
        msg = "Error: Cannot delete volume containing the Heketi database"

        for i in range(0, 2):
            volume_info = heketi_ops.heketi_volume_create(
                self.heketi_client_node, self.heketi_server_url,
                10, json=True)

            self.addCleanup(
                heketi_ops.heketi_volume_delete, self.heketi_client_node,
                self.heketi_server_url, volume_info["id"])

        volume_list_info = heketi_ops.heketi_volume_list(
            self.heketi_client_node,
            self.heketi_server_url, json=True)

        if volume_list_info["volumes"] == []:
            raise ExecutionError("Heketi volume list empty")

        for volume_id in volume_list_info["volumes"]:
            volume_info = heketi_ops.heketi_volume_info(
                self.heketi_client_node, self.heketi_server_url,
                volume_id, json=True)

            if volume_info["name"] == "heketidbstorage":
                heketidbexists = True
                delete_ret, delete_output, delete_error = (
                    heketi_ops.heketi_volume_delete(
                        self.heketi_client_node,
                        self.heketi_server_url, volume_id,
                        raw_cli_output=True))

                self.assertNotEqual(delete_ret, 0, "Return code not 0")
                self.assertEqual(
                    delete_error.strip(), msg,
                    "Invalid reason for heketidb deletion failure")

        if not heketidbexists:
            raise ExecutionError(
                "Warning: heketidbstorage doesn't exist in list of volumes")
    def _node_reboot(self):
        storage_hostname = (g.config["gluster_servers"]
                            [self.gluster_servers[0]]["storage"])

        cmd = "sleep 3; /sbin/shutdown -r now 'Reboot triggered by Glusto'"
        ret, out, err = g.run(storage_hostname, cmd)

        self.addCleanup(self._wait_for_gluster_pod_to_be_ready)

        if ret != 255:
            err_msg = "failed to reboot host %s error: %s" % (
                storage_hostname, err)
            g.log.error(err_msg)
            raise AssertionError(err_msg)

        try:
            g.ssh_close_connection(storage_hostname)
        except Exception as e:
            g.log.error("failed to close connection with host %s"
                        " with error: %s" % (storage_hostname, e))
            raise

        # added sleep as node will restart after 3 sec
        time.sleep(3)

        for w in Waiter(timeout=600, interval=10):
            try:
                if g.rpyc_get_connection(storage_hostname, user="******"):
                    g.rpyc_close_connection(storage_hostname, user="******")
                    break
            except Exception as err:
                g.log.info("exception while getting connection: '%s'" % err)

        if w.expired:
            error_msg = ("exceeded timeout 600 sec, node '%s' is "
                         "not reachable" % storage_hostname)
            g.log.error(error_msg)
            raise ExecutionError(error_msg)

        # wait for the gluster pod to be in 'Running' state
        self._wait_for_gluster_pod_to_be_ready()

        # glusterd and gluster-blockd service should be up and running
        service_names = ("glusterd", "gluster-blockd", "tcmu-runner")
        for gluster_pod in self.gluster_pod_list:
            for service in service_names:
                g.log.info("gluster_pod - '%s' : gluster_service '%s'" % (
                    gluster_pod, service))
                check_service_status_on_pod(
                    self.oc_node, gluster_pod, service, "running"
                )
    def _wait_for_gluster_pod_to_be_ready(self):
        for gluster_pod in self.gluster_pod_list:
            for w in Waiter(timeout=600, interval=10):
                try:
                    success = wait_for_pod_be_ready(
                        self.oc_node, gluster_pod, timeout=1, wait_step=1
                    )
                    if success:
                        break
                except ExecutionError as e:
                    g.log.info("exception %s while validating gluster "
                               "pod %s" % (e, gluster_pod))

            if w.expired:
                error_msg = ("exceeded timeout 600 sec, pod '%s' is "
                             "not in 'running' state" % gluster_pod)
                g.log.error(error_msg)
                raise ExecutionError(error_msg)
Example #9
0
    def reboot_gluster_node_and_wait_for_services(self):
        gluster_node_ip = (
            g.config["gluster_servers"][self.gluster_servers[0]]["storage"])
        gluster_pod = list(
            filter(lambda pod: (pod["pod_host_ip"] == gluster_node_ip),
                   get_ocp_gluster_pod_details(self.oc_node)))
        if not gluster_pod:
            raise ExecutionError("Gluster pod Host IP '%s' not matched." %
                                 gluster_node_ip)
        gluster_pod = gluster_pod[0]["pod_name"]
        self.addCleanup(wait_for_pod_be_ready, self.oc_node, gluster_pod)
        node_reboot_by_command(gluster_node_ip, timeout=600, wait_step=10)

        # wait for the gluster pod to be in 'Running' state
        wait_for_pod_be_ready(self.oc_node, gluster_pod)

        # glusterd and gluster-blockd service should be up and running
        services = (("glusterd", "running"), ("gluster-blockd", "running"),
                    ("tcmu-runner", "running"), ("gluster-block-target",
                                                 "exited"))
        for service, state in services:
            check_service_status_on_pod(self.oc_node, gluster_pod, service,
                                        "active", state)
def validate_multipath_pod(hostname, podname, hacount, mpath):
    """Validate multipath for given app-pod.

     Args:
         hostname (str): ocp master node name
         podname (str): app-pod name for which we need to validate
                        multipath. ex : nginx1
         hacount (int): multipath count or HA count. ex: 3
         mpath (str): multipath value to check
     Returns:
         bool: True if successful, otherwise raises exception
    """

    pod_nodename_list = oc_get_custom_resource(hostname,
                                               'pod',
                                               custom=':.spec.nodeName',
                                               name=podname)
    if not pod_nodename_list:
        raise ExecutionError(
            "Failed to get ip for pod from hostname {}".format(hostname))

    pod_nodename = pod_nodename_list[0]
    active_node_count, enable_node_count = (1, hacount - 1)
    cmd = "multipath -ll %s | grep 'status=active' | wc -l" % mpath
    active_count = int(cmd_run(cmd, pod_nodename))
    assert active_node_count == active_count, (
        "Active node count on %s for %s is %s and not 1" %
        (pod_nodename, podname, active_count))

    cmd = "multipath -ll %s | grep 'status=enabled' | wc -l" % mpath
    enable_count = int(cmd_run(cmd, pod_nodename))
    assert enable_node_count == enable_count, (
        "Passive node count on %s for %s is %s and not %s" %
        (pod_nodename, podname, enable_count, enable_node_count))

    g.log.info("Validation of multipath for %s is successfull" % podname)
    return True
def enable_pvc_resize(master_node):
    '''
     This function edits the /etc/origin/master/master-config.yaml
     file - to enable pv_resize feature
     and restarts atomic-openshift service on master node
     Args:
         master_node (str): hostname of masternode  on which
                           want to edit the
                           master-config.yaml file
     Returns:
         bool: True if successful,
               otherwise raise Exception
    '''
    version = get_openshift_version()
    if version < "3.9":
        msg = ("pv resize is not available in openshift "
               "version %s " % version)
        g.log.error(msg)
        raise NotSupportedException(msg)

    with tempfile.NamedTemporaryFile(delete=False) as temp:
        temp_filename = temp.name

    try:
        g.download(master_node, MASTER_CONFIG_FILEPATH, temp_filename)
    except Exception as e:
        err_msg = (
            "Failed to download '{}' from master node '{}' due to"
            "exception\n{}".format(
                MASTER_CONFIG_FILEPATH, master_node, six.text_type(e)))
        raise ExecutionError(err_msg)

    with open(temp_filename, 'r') as f:
        data = yaml.load(f, Loader=yaml.FullLoader)
        dict_add = data['admissionConfig']['pluginConfig']
        if "PersistentVolumeClaimResize" in dict_add:
            g.log.info("master-config.yaml file is already edited")
            return True
        dict_add['PersistentVolumeClaimResize'] = {
            'configuration': {
                'apiVersion': 'v1',
                'disable': 'false',
                'kind': 'DefaultAdmissionConfig'}}
        data['admissionConfig']['pluginConfig'] = dict_add
        kube_config = data['kubernetesMasterConfig']
        for key in ('apiServerArguments', 'controllerArguments'):
            kube_config[key] = (
                kube_config.get(key)
                if isinstance(kube_config.get(key), dict) else {})
            value = ['ExpandPersistentVolumes=true']
            kube_config[key]['feature-gates'] = value

    with open(temp_filename, 'w+') as f:
        yaml.dump(data, f, default_flow_style=False)

    try:
        g.upload(master_node, temp_filename, MASTER_CONFIG_FILEPATH)
    except Exception as e:
        err_msg = (
            "Failed to upload '{}' to master node '{}' due to"
            "exception\n{}".format(
                master_node, MASTER_CONFIG_FILEPATH, six.text_type(e)))
        raise ExecutionError(err_msg)
    os.unlink(temp_filename)

    if version == "3.9":
        cmd = ("systemctl restart atomic-openshift-master-api "
               "atomic-openshift-master-controllers")
    else:
        cmd = ("/usr/local/bin/master-restart api && "
               "/usr/local/bin/master-restart controllers")
    ret, out, err = g.run(master_node, cmd, "root")
    if ret != 0:
        err_msg = "Failed to execute cmd %s on %s\nout: %s\nerr: %s" % (
            cmd, master_node, out, err)
        g.log.error(err_msg)
        raise ExecutionError(err_msg)

    # Wait for API service to be ready after the restart
    for w in waiter.Waiter(timeout=120, interval=1):
        try:
            cmd_run("oc get nodes", master_node)
            return True
        except AssertionError:
            continue
    err_msg = "Exceeded 120s timeout waiting for OCP API to start responding."
    g.log.error(err_msg)
    raise ExecutionError(err_msg)
Example #12
0
def enable_pvc_resize(master_node):
    '''
     This function edits the /etc/origin/master/master-config.yaml
     file - to enable pv_resize feature
     and restarts atomic-openshift service on master node
     Args:
         master_node (str): hostname of masternode  on which
                           want to edit the
                           master-config.yaml file
     Returns:
         bool: True if successful,
               otherwise raise Exception
    '''
    version = get_openshift_version()
    if version < "3.9":
        msg = ("pv resize is not available in openshift "
               "version %s " % version)
        g.log.error(msg)
        raise NotSupportedException(msg)

    try:
        conn = g.rpyc_get_connection(master_node, user="******")
        if conn is None:
            err_msg = ("Failed to get rpyc connection of node %s" %
                       master_node)
            g.log.error(err_msg)
            raise ExecutionError(err_msg)

        with conn.builtin.open(MASTER_CONFIG_FILEPATH, 'r') as f:
            data = yaml.load(f)
            dict_add = data['admissionConfig']['pluginConfig']
            if "PersistentVolumeClaimResize" in dict_add:
                g.log.info("master-config.yaml file is already edited")
                return True
            dict_add['PersistentVolumeClaimResize'] = {
                'configuration': {
                    'apiVersion': 'v1',
                    'disable': 'false',
                    'kind': 'DefaultAdmissionConfig'
                }
            }
            data['admissionConfig']['pluginConfig'] = dict_add
            kube_config = data['kubernetesMasterConfig']
            for key in ('apiServerArguments', 'controllerArguments'):
                kube_config[key] = (kube_config.get(key) if isinstance(
                    kube_config.get(key), dict) else {})
                value = ['ExpandPersistentVolumes=true']
                kube_config[key]['feature-gates'] = value
        with conn.builtin.open(MASTER_CONFIG_FILEPATH, 'w+') as f:
            yaml.dump(data, f, default_flow_style=False)
    except Exception as err:
        raise ExecutionError("failed to edit master-config.yaml file "
                             "%s on %s" % (err, master_node))
    finally:
        g.rpyc_close_connection(master_node, user="******")

    g.log.info("successfully edited master-config.yaml file "
               "%s" % master_node)
    if version == "3.9":
        cmd = ("systemctl restart atomic-openshift-master-api "
               "atomic-openshift-master-controllers")
    else:
        cmd = ("/usr/local/bin/master-restart api && "
               "/usr/local/bin/master-restart controllers")
    ret, out, err = g.run(master_node, cmd, "root")
    if ret != 0:
        err_msg = "Failed to execute cmd %s on %s\nout: %s\nerr: %s" % (
            cmd, master_node, out, err)
        g.log.error(err_msg)
        raise ExecutionError(err_msg)

    return True