def test_add_node_cleanup(self):
        """
        Test to cleanup possible resources created in TestAddNode class
        """

        # Get info from SCALE_DATA_FILE for validation
        if os.path.exists(SCALE_DATA_FILE):
            file_data = templating.load_yaml(SCALE_DATA_FILE)
            namespace = file_data.get("NAMESPACE")
            pod_obj_file_list = file_data.get("POD_OBJ_FILE_LIST")
            pvc_obj_file_list = file_data.get("PVC_OBJ_FILE_LIST")
        else:
            raise FileNotFoundError

        ocs_obj = OCP(namespace=namespace)

        # Delete pods
        for obj_file in pod_obj_file_list:
            obj_file_path = f"{log_path}/{obj_file}"
            cmd_str = f"delete -f {obj_file_path}"
            ocs_obj.exec_oc_cmd(command=cmd_str)

        # Delete pvcs
        for obj_file in pvc_obj_file_list:
            obj_file_path = f"{log_path}/{obj_file}"
            cmd_str = f"delete -f {obj_file_path}"
            ocs_obj.exec_oc_cmd(command=cmd_str)

        # Delete machineset
        for obj in machine.get_machineset_objs():
            if "app" in obj.name:
                machine.delete_custom_machineset(obj.name)
Esempio n. 2
0
def check_and_add_enough_worker(worker_count):
    """
    Function to check if there is enough workers available to scale pods.
    IF there is no enough worker then worker will be added based on supported platforms
    Function also adds scale label to the respective worker nodes.

    Args:
        worker_count (int): Expected worker count to be present in the setup

    Returns:
        book: True is there is enough worker count else raise exception.

    """
    # Check either to use OCS workers for scaling app pods
    # Further continue to label the worker with scale label else not
    worker_list = node.get_worker_nodes()
    ocs_worker_list = machine.get_labeled_nodes(constants.OPERATOR_NODE_LABEL)
    scale_worker = machine.get_labeled_nodes(constants.SCALE_LABEL)
    if config.RUN.get("use_ocs_worker_for_scale"):
        if not scale_worker:
            helpers.label_worker_node(node_list=worker_list,
                                      label_key="scale-label",
                                      label_value="app-scale")
    else:
        if not scale_worker:
            for node_item in ocs_worker_list:
                worker_list.remove(node_item)
            if worker_list:
                helpers.label_worker_node(
                    node_list=worker_list,
                    label_key="scale-label",
                    label_value="app-scale",
                )
    scale_worker_list = machine.get_labeled_nodes(constants.SCALE_LABEL)
    logging.info(f"Print existing scale worker {scale_worker_list}")

    # Check if there is enough nodes to continue scaling of app pods
    if len(scale_worker_list) >= worker_count:
        logging.info(f"Setup has expected worker count {worker_count} "
                     "to continue scale of pods")
        return True
    else:
        logging.info(
            "There is no enough worker in the setup, will add enough worker "
            "for the automation supported platforms")
        # Add enough worker for AWS
        if (config.ENV_DATA["deployment_type"] == "ipi"
                and config.ENV_DATA["platform"].lower() == "aws"):
            # Create machineset for app worker nodes on each aws zone
            # Each zone will have one app worker node
            ms_name = list()
            labels = [("node-role.kubernetes.io/app", "app-scale")]
            for obj in machine.get_machineset_objs():
                if "app" in obj.name:
                    ms_name.append(obj.name)
            if not ms_name:
                if len(machine.get_machineset_objs()) == 3:
                    for zone in ["a", "b", "c"]:
                        ms_name.append(
                            machine.create_custom_machineset(
                                instance_type="m5.4xlarge",
                                labels=labels,
                                zone=zone,
                            ))
                else:
                    ms_name.append(
                        machine.create_custom_machineset(
                            instance_type="m5.4xlarge",
                            labels=labels,
                            zone="a",
                        ))
                for ms in ms_name:
                    machine.wait_for_new_node_to_be_ready(ms)
            if len(ms_name) == 3:
                exp_count = int(worker_count / 3)
            else:
                exp_count = worker_count
            for name in ms_name:
                machine.add_node(machine_set=name, count=exp_count)
            for ms in ms_name:
                machine.wait_for_new_node_to_be_ready(ms)
            worker_list = node.get_worker_nodes()
            ocs_worker_list = machine.get_labeled_nodes(
                constants.OPERATOR_NODE_LABEL)
            scale_label_worker = machine.get_labeled_nodes(
                constants.SCALE_LABEL)
            ocs_worker_list.extend(scale_label_worker)
            final_list = list(dict.fromkeys(ocs_worker_list))
            for node_item in final_list:
                if node_item in worker_list:
                    worker_list.remove(node_item)
            if worker_list:
                helpers.label_worker_node(
                    node_list=worker_list,
                    label_key="scale-label",
                    label_value="app-scale",
                )
            return True
        elif (config.ENV_DATA["deployment_type"] == "upi"
              and config.ENV_DATA["platform"].lower() == "vsphere"):
            raise UnsupportedPlatformError(
                "Unsupported Platform to add worker")
        elif (config.ENV_DATA["deployment_type"] == "upi"
              and config.ENV_DATA["platform"].lower() == "baremetal"):
            raise UnsupportedPlatformError(
                "Unsupported Platform to add worker")
        elif (config.ENV_DATA["deployment_type"] == "upi"
              and config.ENV_DATA["platform"].lower() == "azure"):
            raise UnsupportedPlatformError(
                "Unsupported Platform to add worker")
        else:
            raise UnavailableResourceException(
                "There is no enough worker nodes to continue app pod scaling")
Esempio n. 3
0
    def create_scale_pods(
        self,
        scale_count=1500,
        pods_per_iter=5,
        io_runtime=None,
        pvc_size=None,
        start_io=None,
    ):
        """
        Main Function with scale pod creation flow and checks to add nodes.
        For other platforms will not be considering the instance_type param

        Args:
            scale_count (int): Scale pod+pvc count
            io_runtime (sec): Fio run time in seconds
            start_io (bool): If True start IO else don't
            pods_per_iter (int): Number of PVC-POD to be created per PVC type
            pvc_size (Gi): size of PVC
            Example, If 5 then 20 PVC+POD will be created with 5 each of 4 PVC types
            Test value in-between 5-10

        """
        self.ms_name, all_pod_obj = ([] for i in range(2))
        if not 5 <= pods_per_iter <= 10:
            raise UnexpectedBehaviour(
                "Pods_per_iter value should be in-between 5-15")

        # Check for expected worker count
        expected_worker_count = get_expected_worker_count(scale_count)
        if check_and_add_enough_worker(expected_worker_count):
            if (config.ENV_DATA["deployment_type"] == "ipi"
                    and config.ENV_DATA["platform"].lower() == "aws"):
                for obj in machine.get_machineset_objs():
                    if "app" in obj.name:
                        self.ms_name.append(obj.name)
            else:
                self.ms_name = []

        # Create namespace
        self.create_and_set_namespace()

        # Continue to iterate till the scale pvc limit is reached
        while True:
            if scale_count <= len(all_pod_obj):
                logger.info(f"Scaled {scale_count} pvc and pods")

                if cluster.validate_pg_balancer():
                    logging.info(
                        "OSD consumption and PG distribution is good to continue"
                    )
                else:
                    raise UnexpectedBehaviour(
                        "Unequal PG distribution to OSDs")

                break
            else:
                logger.info(f"Scaled PVC and POD count {len(all_pod_obj)}")
                self.pod_obj, self.pvc_obj = self.create_multi_pvc_pod(
                    pods_per_iter, io_runtime, start_io, pvc_size)
                all_pod_obj.extend(self.pod_obj)
                try:
                    # Check enough resources available in the dedicated app workers
                    check_enough_resource_available_in_workers(
                        self.ms_name, self.pod_dict_path)

                    # Check for ceph cluster OSD utilization
                    if not cluster.validate_osd_utilization(osd_used=75):
                        logging.info("Cluster OSD utilization is below 75%")
                    elif not cluster.validate_osd_utilization(osd_used=83):
                        logger.warning("Cluster OSD utilization is above 75%")
                    else:
                        raise CephHealthException("Cluster OSDs are near full")

                    # Check for 500 pods per namespace
                    pod_objs = pod.get_all_pods(
                        namespace=self.namespace_list[-1].namespace)
                    if len(pod_objs) >= 500:
                        self.create_and_set_namespace()

                except UnexpectedBehaviour:
                    logging.error(
                        f"Scaling of cluster failed after {len(all_pod_obj)} pod creation"
                    )
                    raise UnexpectedBehaviour(
                        "Scaling PVC+POD failed analyze setup and log for more details"
                    )
Esempio n. 4
0
    def create_scale_pods(self, scale_count=1500, pvc_per_pod_count=20):
        """
        Main Function with scale pod creation flow and checks to add nodes
        for the supported platforms, validates pg-balancer after scaling
        Function breaks the scale_count in multiples of 750 and iterates those
        many time to reach the desired count.

        Args:
            scale_count (int): No of PVCs to be Scaled
            pvc_per_pod_count (int): Number of PVCs to be attached to single POD
            Example, If 20 then 20 PVCs will be attached to single POD

        """

        # Minimal scale creation count should be 750, code is optimized to
        # scale PVC's not more than 750 count.
        # Used max_pvc_count+10 in certain places to round up the value.
        # i.e. while attaching 20 PVCs to single pod with 750 PVCs last pod
        # will left out with 10 PVCs so to avoid the problem scaling 10 more.
        max_pvc_count = 750
        if scale_count < max_pvc_count:
            raise UnexpectedBehaviour("Minimal scale PVC creation count should be 750")

        self.ms_name = list()

        # Check for expected worker count
        expected_worker_count = get_expected_worker_count(scale_count)
        if check_and_add_enough_worker(expected_worker_count):
            if (
                config.ENV_DATA["deployment_type"] == "ipi"
                and config.ENV_DATA["platform"].lower() == "aws"
            ):
                for obj in machine.get_machineset_objs():
                    if "app" in obj.name:
                        self.ms_name.append(obj.name)
            else:
                self.ms_name = []

        # Create namespace
        self.create_and_set_namespace()

        expected_itr_counter = int(scale_count / max_pvc_count)
        actual_itr_counter = 0

        # Continue to iterate till the scale pvc limit is reached
        while True:
            if actual_itr_counter == expected_itr_counter:
                logging.info(
                    f"Scaled {scale_count} PVCs and created {scale_count/20} PODs"
                )

                if cluster.validate_pg_balancer():
                    logging.info(
                        "OSD consumption and PG distribution is good to continue"
                    )
                else:
                    raise UnexpectedBehaviour("Unequal PG distribution to OSDs")

                break
            else:
                actual_itr_counter += 1
                rbd_pvc, fs_pvc, pod_running = self.create_multi_pvc_pod(
                    pvc_count=max_pvc_count + 10,
                    pvcs_per_pod=pvc_per_pod_count,
                    obj_name=f"obj{actual_itr_counter}",
                )
                logging.info(
                    f"Scaled {len(rbd_pvc)+len(fs_pvc)} PVCs and Created "
                    f"{len(pod_running)} PODs in interation {actual_itr_counter}"
                )

        logging.info(
            f"Scaled {actual_itr_counter * (max_pvc_count+10)} PVC's and "
            f"Created {int((actual_itr_counter * (max_pvc_count+10))/20)} PODs"
        )
Esempio n. 5
0
def add_worker_node(instance_type=None):
    global ms_name
    ms_name = list()
    worker_list = node.get_worker_nodes()
    ocs_worker_list = machine.get_labeled_nodes(constants.OPERATOR_NODE_LABEL)
    scale_worker = machine.get_labeled_nodes(constants.SCALE_LABEL)
    if config.RUN.get("use_ocs_worker_for_scale"):
        if not scale_worker:
            helpers.label_worker_node(node_list=worker_list,
                                      label_key="scale-label",
                                      label_value="app-scale")
    else:
        if not scale_worker:
            for node_item in ocs_worker_list:
                worker_list.remove(node_item)
            if worker_list:
                helpers.label_worker_node(
                    node_list=worker_list,
                    label_key="scale-label",
                    label_value="app-scale",
                )
    scale_worker_list = machine.get_labeled_nodes(constants.SCALE_LABEL)
    logging.info(f"Print existing scale worker {scale_worker_list}")

    if (config.ENV_DATA["deployment_type"] == "ipi"
            and config.ENV_DATA["platform"].lower() == "aws"):
        log.info("Adding worker nodes on the current cluster")
        labels = [("node-role.kubernetes.io/app", "app-scale")]
        # Create machineset for app worker nodes on each zone
        for obj in machine.get_machineset_objs():
            if "app" in obj.name:
                ms_name.append(obj.name)
        if instance_type is not None:
            instance_type = instance_type
        else:
            instance_type = "m5.4xlarge"
        if not ms_name:
            if len(machine.get_machineset_objs()) == 3:
                for zone in ["a", "b", "c"]:
                    ms_name.append(
                        machine.create_custom_machineset(
                            instance_type=instance_type,
                            labels=labels,
                            zone=zone,
                        ))
            else:
                ms_name.append(
                    machine.create_custom_machineset(
                        instance_type=instance_type,
                        labels=labels,
                        zone="a",
                    ))
            for ms in ms_name:
                machine.wait_for_new_node_to_be_ready(ms)

        worker_list = node.get_worker_nodes()
        ocs_worker_list = machine.get_labeled_nodes(
            constants.OPERATOR_NODE_LABEL)
        scale_label_worker = machine.get_labeled_nodes(constants.SCALE_LABEL)
        ocs_worker_list.extend(scale_label_worker)
        final_list = list(dict.fromkeys(ocs_worker_list))
        for node_item in final_list:
            if node_item in worker_list:
                worker_list.remove(node_item)
        if worker_list:
            helpers.label_worker_node(node_list=worker_list,
                                      label_key="scale-label",
                                      label_value="app-scale")
        return True
    elif (config.ENV_DATA["deployment_type"] == "upi"
          and config.ENV_DATA["platform"].lower() == "vsphere"):
        log.info("Running scale test on existing worker nodes.")
    elif (config.ENV_DATA["deployment_type"] == "upi"
          and config.ENV_DATA["platform"].lower() == "baremetal"):
        log.info("Running scale test on existing worker nodes.")
    elif (config.ENV_DATA["deployment_type"] == "upi"
          and config.ENV_DATA["platform"].lower() == "azure"):
        raise UnsupportedPlatformError("Unsupported Platform")
Esempio n. 6
0
def add_worker_node(instance_type=None):
    global ms_name
    ms_name = list()
    worker_list = helpers.get_worker_nodes()
    ocs_worker_list = machine.get_labeled_nodes(constants.OPERATOR_NODE_LABEL)
    scale_worker = machine.get_labeled_nodes(constants.SCALE_LABEL)
    if config.RUN.get('use_ocs_worker_for_scale'):
        if not scale_worker:
            helpers.label_worker_node(node_list=worker_list,
                                      label_key='scale-label',
                                      label_value='app-scale')
    else:
        if not scale_worker:
            for node_item in ocs_worker_list:
                worker_list.remove(node_item)
            if worker_list:
                helpers.label_worker_node(node_list=worker_list,
                                          label_key='scale-label',
                                          label_value='app-scale')
    scale_worker_list = machine.get_labeled_nodes(constants.SCALE_LABEL)
    logging.info(f"Print existing scale worker {scale_worker_list}")

    if config.ENV_DATA['deployment_type'] == 'ipi' and config.ENV_DATA[
            'platform'].lower() == 'aws':
        log.info("Adding worker nodes on the current cluster")
        # Create machineset for app worker nodes on each zone
        for obj in machine.get_machineset_objs():
            if 'app' in obj.name:
                ms_name.append(obj.name)
        if instance_type is not None:
            instance_type = instance_type
        else:
            instance_type = 'm5.4xlarge'
        if not ms_name:
            if len(machine.get_machineset_objs()) == 3:
                for zone in ['a', 'b', 'c']:
                    ms_name.append(
                        machine.create_custom_machineset(
                            instance_type=instance_type, zone=zone))
            else:
                ms_name.append(
                    machine.create_custom_machineset(
                        instance_type=instance_type, zone='a'))
            for ms in ms_name:
                machine.wait_for_new_node_to_be_ready(ms)

        worker_list = helpers.get_worker_nodes()
        ocs_worker_list = machine.get_labeled_nodes(
            constants.OPERATOR_NODE_LABEL)
        scale_label_worker = machine.get_labeled_nodes(constants.SCALE_LABEL)
        ocs_worker_list.extend(scale_label_worker)
        final_list = list(dict.fromkeys(ocs_worker_list))
        for node_item in final_list:
            if node_item in worker_list:
                worker_list.remove(node_item)
        if worker_list:
            helpers.label_worker_node(node_list=worker_list,
                                      label_key='scale-label',
                                      label_value='app-scale')
        return True
    elif config.ENV_DATA['deployment_type'] == 'upi' and config.ENV_DATA[
            'platform'].lower() == 'vsphere':
        log.info('Running pgsql on existing worker nodes')
    elif config.ENV_DATA['deployment_type'] == 'upi' and config.ENV_DATA[
            'platform'].lower() == 'baremetal':
        log.info('Running pgsql on existing worker nodes')
    elif config.ENV_DATA['deployment_type'] == 'upi' and config.ENV_DATA[
            'platform'].lower() == 'azure':
        raise UnsupportedPlatformError("Unsupported Platform")