예제 #1
0
파일: node.py 프로젝트: mirekdlugosz/ocs-ci
def add_new_node_and_label_it(machineset_name):
    """
    Add a new node and label it

    Args:
        machineset_name (str): Name of the machine set

    """
    # Get the initial nodes list
    initial_nodes = get_worker_nodes()
    log.info(f"Current available worker nodes are {initial_nodes}")

    # get machineset replica count
    machineset_replica_count = machine.get_replica_count(machineset_name)

    # Increase its replica count
    machine.add_node(machineset_name, count=machineset_replica_count + 1)
    log.info(f"Increased {machineset_name} count "
             f"by {machineset_replica_count + 1}")

    # wait for the new node to come to ready state
    log.info("Waiting for the new node to be in ready state")
    machine.wait_for_new_node_to_be_ready(machineset_name)

    # Get the node name of new spun node
    nodes_after_new_spun_node = get_worker_nodes()
    new_spun_node = list(set(nodes_after_new_spun_node) - set(initial_nodes))
    log.info(f"New spun node is {new_spun_node}")

    # Label it
    node_obj = ocp.OCP(kind='node')
    node_obj.add_label(resource_name=new_spun_node[0],
                       label=constants.OPERATOR_NODE_LABEL)
    log.info(f"Successfully labeled {new_spun_node} with OCS storage label")
예제 #2
0
def add_new_node_and_label_it(
    machineset_name, num_nodes=1, mark_for_ocs_label=True
):
    """
    Add a new node for ipi and label it

    Args:
        machineset_name (str): Name of the machine set
        num_nodes (int): number of nodes to add
        mark_for_ocs_label (bool): True if label the new node
    eg: add_new_node_and_label_it("new-tdesala-zlqzn-worker-us-east-2a")

    Returns:
        list: new spun nodes

    """
    # Get the initial nodes list
    initial_nodes = tests.helpers.get_worker_nodes()
    log.info(f"Current available worker nodes are {initial_nodes}")

    # get machineset replica count
    machineset_replica_count = machine.get_replica_count(machineset_name)
    log.info(
        f"{machineset_name} has replica count: {machineset_replica_count}"
    )

    # Increase its replica count
    log.info(f"Increasing the replica count by {num_nodes}")
    machine.add_node(machineset_name, count=machineset_replica_count + num_nodes)
    log.info(
        f"{machineset_name} now has replica "
        f"count: {machineset_replica_count + num_nodes}"
    )

    # wait for the new node to come to ready state
    log.info("Waiting for the new node to be in ready state")
    machine.wait_for_new_node_to_be_ready(machineset_name)

    # Get the node name of new spun node
    nodes_after_new_spun_node = tests.helpers.get_worker_nodes()
    new_spun_nodes = list(
        set(nodes_after_new_spun_node) - set(initial_nodes)
    )
    log.info(f"New spun nodes: {new_spun_nodes}")

    # Label it
    if mark_for_ocs_label:
        node_obj = ocp.OCP(kind='node')
        for new_spun_node in new_spun_nodes:
            node_obj.add_label(
                resource_name=new_spun_node,
                label=constants.OPERATOR_NODE_LABEL
            )
            logging.info(
                f"Successfully labeled {new_spun_node} with OCS storage label"
            )

    return new_spun_nodes
예제 #3
0
def add_worker_based_on_pods_count_per_node(
    node_count, expected_count, role_type=None, machineset_name=None
):
    """
    Function to evaluate number of pods up in node and add new node accordingly.

    Args:
        machineset_name (list): Machineset_names to add more nodes if required.
        node_count (int): Additional nodes to be added
        expected_count (int): Expected pod count in one node
        role_type (str): To add type to the nodes getting added

    Returns:
        bool: True if Nodes gets added, else false.

    """
    # Check for POD running count on each nodes
    if (
        config.ENV_DATA["deployment_type"] == "ipi"
        and config.ENV_DATA["platform"].lower() == "aws"
    ):
        app_nodes = node.get_nodes(node_type=role_type)
        pod_count_dict = node.get_running_pod_count_from_node(node_type=role_type)
        high_count_nodes, less_count_nodes = ([] for i in range(2))
        for node_obj in app_nodes:
            count = pod_count_dict[f"{node_obj.name}"]
            if count >= expected_count:
                high_count_nodes.append(node_obj.name)
            else:
                less_count_nodes.append(node_obj.name)
        if len(less_count_nodes) <= 1:
            for name in machineset_name:
                count = machine.get_replica_count(machine_set=name)
                machine.add_node(machine_set=name, count=(count + node_count))
                machine.wait_for_new_node_to_be_ready(name)
            return True
        else:
            logging.info(
                f"Enough pods can be created with available nodes {pod_count_dict}"
            )
            return False
    elif (
        config.ENV_DATA["deployment_type"] == "upi"
        and config.ENV_DATA["platform"].lower() == "vsphere"
    ):
        raise UnsupportedPlatformError("Unsupported Platform to add worker")
    elif (
        config.ENV_DATA["deployment_type"] == "upi"
        and config.ENV_DATA["platform"].lower() == "baremetal"
    ):
        raise UnsupportedPlatformError("Unsupported Platform to add worker")
    elif (
        config.ENV_DATA["deployment_type"] == "upi"
        and config.ENV_DATA["platform"].lower() == "azure"
    ):
        raise UnsupportedPlatformError("Unsupported Platform to add worker")
예제 #4
0
def add_worker_based_on_cpu_utilization(
    node_count, expected_percent, role_type=None, machineset_name=None
):
    """
    Function to evaluate CPU utilization of nodes and add node if required.

    Args:
        machineset_name (list): Machineset_names to add more nodes if required.
        node_count (int): Additional nodes to be added
        expected_percent (int): Expected utilization precent
        role_type (str): To add type to the nodes getting added

    Returns:
        bool: True if Nodes gets added, else false.

    """
    # Check for CPU utilization on each nodes
    if (
        config.ENV_DATA["deployment_type"] == "ipi"
        and config.ENV_DATA["platform"].lower() == "aws"
    ):
        app_nodes = node.get_nodes(node_type=role_type)
        uti_dict = node.get_node_resource_utilization_from_oc_describe(
            node_type=role_type
        )
        uti_high_nodes, uti_less_nodes = ([] for i in range(2))
        for node_obj in app_nodes:
            utilization_percent = uti_dict[f"{node_obj.name}"]["cpu"]
            if utilization_percent > expected_percent:
                uti_high_nodes.append(node_obj.name)
            else:
                uti_less_nodes.append(node_obj.name)
        if len(uti_less_nodes) <= 1:
            for name in machineset_name:
                count = machine.get_replica_count(machine_set=name)
                machine.add_node(machine_set=name, count=(count + node_count))
                machine.wait_for_new_node_to_be_ready(name)
            return True
        else:
            logging.info(f"Enough resource available for more pod creation {uti_dict}")
            return False
    elif (
        config.ENV_DATA["deployment_type"] == "upi"
        and config.ENV_DATA["platform"].lower() == "vsphere"
    ):
        raise UnsupportedPlatformError("Unsupported Platform to add worker")
    elif (
        config.ENV_DATA["deployment_type"] == "upi"
        and config.ENV_DATA["platform"].lower() == "baremetal"
    ):
        raise UnsupportedPlatformError("Unsupported Platform to add worker")
    elif (
        config.ENV_DATA["deployment_type"] == "upi"
        and config.ENV_DATA["platform"].lower() == "azure"
    ):
        raise UnsupportedPlatformError("Unsupported Platform to add worker")
예제 #5
0
    def test_add_node(self):
        """
        Test for adding worker nodes to the cluster while IOs
        """
        dt = config.ENV_DATA['deployment_type']
        if dt == 'ipi':
            before_replica_counts = dict()
            count = 2
            machines = machine_utils.get_machinesets()
            for machine in machines:
                before_replica_counts.update(
                    {machine: machine_utils.get_replica_count(machine)})
            worker_nodes_before = helpers.get_worker_nodes()
            logger.info(
                f'The worker nodes number before adding a new node is {len(worker_nodes_before)}'
            )
            after_replica_counts = dict()
            for machine in machines:
                machine_utils.add_node(machine, count=count)
                after_replica_counts.update(({
                    machine:
                    machine_utils.get_replica_count(machine)
                }))
            logger.info(after_replica_counts)
            for sample in TimeoutSampler(timeout=300,
                                         sleep=3,
                                         func=helpers.get_worker_nodes):
                if len(sample) == count * len(machines):
                    break

            worker_nodes_after = helpers.get_worker_nodes()
            logger.info(
                f'The worker nodes number after adding a new node is {len(worker_nodes_after)}'
            )
            wait_for_nodes_status(node_names=worker_nodes_after,
                                  status=constants.NODE_READY)
        else:
            pytest.skip("UPI not yet supported")
예제 #6
0
def check_and_add_enough_worker(worker_count):
    """
    Function to check if there is enough workers available to scale pods.
    IF there is no enough worker then worker will be added based on supported platforms
    Function also adds scale label to the respective worker nodes.

    Args:
        worker_count (int): Expected worker count to be present in the setup

    Returns:
        book: True is there is enough worker count else raise exception.

    """
    # Check either to use OCS workers for scaling app pods
    # Further continue to label the worker with scale label else not
    worker_list = node.get_worker_nodes()
    ocs_worker_list = machine.get_labeled_nodes(constants.OPERATOR_NODE_LABEL)
    scale_worker = machine.get_labeled_nodes(constants.SCALE_LABEL)
    if config.RUN.get("use_ocs_worker_for_scale"):
        if not scale_worker:
            helpers.label_worker_node(node_list=worker_list,
                                      label_key="scale-label",
                                      label_value="app-scale")
    else:
        if not scale_worker:
            for node_item in ocs_worker_list:
                worker_list.remove(node_item)
            if worker_list:
                helpers.label_worker_node(
                    node_list=worker_list,
                    label_key="scale-label",
                    label_value="app-scale",
                )
    scale_worker_list = machine.get_labeled_nodes(constants.SCALE_LABEL)
    logging.info(f"Print existing scale worker {scale_worker_list}")

    # Check if there is enough nodes to continue scaling of app pods
    if len(scale_worker_list) >= worker_count:
        logging.info(f"Setup has expected worker count {worker_count} "
                     "to continue scale of pods")
        return True
    else:
        logging.info(
            "There is no enough worker in the setup, will add enough worker "
            "for the automation supported platforms")
        # Add enough worker for AWS
        if (config.ENV_DATA["deployment_type"] == "ipi"
                and config.ENV_DATA["platform"].lower() == "aws"):
            # Create machineset for app worker nodes on each aws zone
            # Each zone will have one app worker node
            ms_name = list()
            labels = [("node-role.kubernetes.io/app", "app-scale")]
            for obj in machine.get_machineset_objs():
                if "app" in obj.name:
                    ms_name.append(obj.name)
            if not ms_name:
                if len(machine.get_machineset_objs()) == 3:
                    for zone in ["a", "b", "c"]:
                        ms_name.append(
                            machine.create_custom_machineset(
                                instance_type="m5.4xlarge",
                                labels=labels,
                                zone=zone,
                            ))
                else:
                    ms_name.append(
                        machine.create_custom_machineset(
                            instance_type="m5.4xlarge",
                            labels=labels,
                            zone="a",
                        ))
                for ms in ms_name:
                    machine.wait_for_new_node_to_be_ready(ms)
            if len(ms_name) == 3:
                exp_count = int(worker_count / 3)
            else:
                exp_count = worker_count
            for name in ms_name:
                machine.add_node(machine_set=name, count=exp_count)
            for ms in ms_name:
                machine.wait_for_new_node_to_be_ready(ms)
            worker_list = node.get_worker_nodes()
            ocs_worker_list = machine.get_labeled_nodes(
                constants.OPERATOR_NODE_LABEL)
            scale_label_worker = machine.get_labeled_nodes(
                constants.SCALE_LABEL)
            ocs_worker_list.extend(scale_label_worker)
            final_list = list(dict.fromkeys(ocs_worker_list))
            for node_item in final_list:
                if node_item in worker_list:
                    worker_list.remove(node_item)
            if worker_list:
                helpers.label_worker_node(
                    node_list=worker_list,
                    label_key="scale-label",
                    label_value="app-scale",
                )
            return True
        elif (config.ENV_DATA["deployment_type"] == "upi"
              and config.ENV_DATA["platform"].lower() == "vsphere"):
            raise UnsupportedPlatformError(
                "Unsupported Platform to add worker")
        elif (config.ENV_DATA["deployment_type"] == "upi"
              and config.ENV_DATA["platform"].lower() == "baremetal"):
            raise UnsupportedPlatformError(
                "Unsupported Platform to add worker")
        elif (config.ENV_DATA["deployment_type"] == "upi"
              and config.ENV_DATA["platform"].lower() == "azure"):
            raise UnsupportedPlatformError(
                "Unsupported Platform to add worker")
        else:
            raise UnavailableResourceException(
                "There is no enough worker nodes to continue app pod scaling")
    def identify_and_add_nodes(self, scenario, num_of_nodes):
        """
        Fetches info about the worker nodes and add nodes (if required)

        Args:
            scenario (str): Scenario of app pods running on OCS or dedicated nodes
                (eg., 'colocated', 'dedicated')
            num_of_nodes (int): number of nodes required for running test

        Returns:
            tuple: tuple containing:
                list: list of OCS nodes name
                list: list of non-OCS nodes name

        """
        nodes_to_add = 0
        initial_worker_nodes = helpers.get_worker_nodes()
        ocs_nodes = machine.get_labeled_nodes(constants.OPERATOR_NODE_LABEL)
        non_ocs_nodes = list(set(initial_worker_nodes) - set(ocs_nodes))

        if 'colocated' in scenario and len(ocs_nodes) < num_of_nodes:
            nodes_to_add = num_of_nodes - len(initial_worker_nodes)

        if 'dedicated' in scenario and len(non_ocs_nodes) < num_of_nodes:
            nodes_to_add = num_of_nodes - len(non_ocs_nodes)

        if nodes_to_add > 0:
            logger.info(f"{nodes_to_add} extra workers nodes needed")
            if config.ENV_DATA['deployment_type'] == 'ipi':
                machine_name = machine.get_machine_from_node_name(
                    random.choice(initial_worker_nodes)
                )
                machineset_name = machine.get_machineset_from_machine_name(
                    machine_name
                )
                machineset_replica_count = machine.get_replica_count(
                    machineset_name
                )
                machine.add_node(
                    machineset_name,
                    count=machineset_replica_count + nodes_to_add
                )
                logger.info("Waiting for the new node(s) to be in ready state")
                machine.wait_for_new_node_to_be_ready(machineset_name)
            else:
                # TODO: Add required num of nodes instead of skipping
                # https://github.com/red-hat-storage/ocs-ci/issues/1291
                pytest.skip("Add node not implemented for UPI, github issue #1291")

            new_worker_nodes = helpers.get_worker_nodes()
            new_nodes_added = list(set(new_worker_nodes) - set(initial_worker_nodes))
            assert len(new_nodes_added) > 0, 'Extra nodes not added in the cluster'
            non_ocs_nodes += new_nodes_added

        if 'colocated' in scenario and len(ocs_nodes) < num_of_nodes:
            logger.info('Adding OCS storage label to Non-OCS workers')
            node_obj = ocp.OCP(kind=constants.NODE)
            nodes_to_label = non_ocs_nodes[0:(num_of_nodes - len(ocs_nodes))]
            for node_name in nodes_to_label:
                node_obj.add_label(
                    resource_name=node_name, label=constants.OPERATOR_NODE_LABEL
                )
                ocs_nodes.append(node_name)
            non_ocs_nodes = list(set(non_ocs_nodes) - set(ocs_nodes))

        logger.info(f"The OCS nodes are : {ocs_nodes}")
        logger.info(f"The Non-OCS nodes are: {non_ocs_nodes}")
        return ocs_nodes, non_ocs_nodes
    def identify_and_add_nodes(self, scenario, num_of_nodes):
        """
        Fetches info about the worker nodes and add nodes (if required)

        Args:
            scenario (str): Scenario of app pods running on OCS or dedicated nodes
                (eg., 'colocated', 'dedicated')
            num_of_nodes (int): number of nodes required for running test

        Returns:
            tuple: tuple containing:
                list: list of OCS nodes name
                list: list of non-OCS nodes name

        """
        nodes_to_add = 0
        initial_worker_nodes = node.get_worker_nodes()
        ocs_nodes = machine.get_labeled_nodes(constants.OPERATOR_NODE_LABEL)
        non_ocs_nodes = list(set(initial_worker_nodes) - set(ocs_nodes))

        if "colocated" in scenario and len(ocs_nodes) < num_of_nodes:
            nodes_to_add = num_of_nodes - len(initial_worker_nodes)

        if "dedicated" in scenario and len(non_ocs_nodes) < num_of_nodes:
            nodes_to_add = num_of_nodes - len(non_ocs_nodes)

        if nodes_to_add > 0:
            logger.info(f"{nodes_to_add} extra workers nodes needed")

            if config.ENV_DATA["deployment_type"] == "ipi":
                machine_name = machine.get_machine_from_node_name(
                    random.choice(initial_worker_nodes))
                machineset_name = machine.get_machineset_from_machine_name(
                    machine_name)
                machineset_replica_count = machine.get_replica_count(
                    machineset_name)
                machine.add_node(machineset_name,
                                 count=machineset_replica_count + nodes_to_add)
                logger.info("Waiting for the new node(s) to be in ready state")
                machine.wait_for_new_node_to_be_ready(machineset_name)
            else:
                if (config.ENV_DATA.get("platform").lower() ==
                        constants.VSPHERE_PLATFORM):
                    pytest.skip(
                        "Skipping add node in VSPHERE due to https://bugzilla.redhat.com/show_bug.cgi?id=1844521"
                    )
                is_rhel = config.ENV_DATA.get(
                    "rhel_workers") or config.ENV_DATA.get("rhel_user")
                node_type = constants.RHEL_OS if is_rhel else constants.RHCOS
                node.add_new_node_and_label_upi(
                    node_type=node_type,
                    num_nodes=nodes_to_add,
                    mark_for_ocs_label=False,
                )

            new_worker_nodes = node.get_worker_nodes()
            new_nodes_added = list(
                set(new_worker_nodes) - set(initial_worker_nodes))
            assert (len(new_nodes_added) == nodes_to_add
                    ), "Extra nodes not added in the cluster"
            non_ocs_nodes += new_nodes_added

        if "colocated" in scenario and len(ocs_nodes) < num_of_nodes:
            logger.info("Adding OCS storage label to Non-OCS workers")
            node_obj = ocp.OCP(kind=constants.NODE)
            nodes_to_label = non_ocs_nodes[0:(num_of_nodes - len(ocs_nodes))]
            for node_name in nodes_to_label:
                node_obj.add_label(resource_name=node_name,
                                   label=constants.OPERATOR_NODE_LABEL)
                ocs_nodes.append(node_name)
            non_ocs_nodes = list(set(non_ocs_nodes) - set(ocs_nodes))

        logger.info(f"The OCS nodes are : {ocs_nodes}")
        logger.info(f"The Non-OCS nodes are: {non_ocs_nodes}")
        return ocs_nodes, non_ocs_nodes