def add_new_node_and_label_it(machineset_name): """ Add a new node and label it Args: machineset_name (str): Name of the machine set """ # Get the initial nodes list initial_nodes = get_worker_nodes() log.info(f"Current available worker nodes are {initial_nodes}") # get machineset replica count machineset_replica_count = machine.get_replica_count(machineset_name) # Increase its replica count machine.add_node(machineset_name, count=machineset_replica_count + 1) log.info(f"Increased {machineset_name} count " f"by {machineset_replica_count + 1}") # wait for the new node to come to ready state log.info("Waiting for the new node to be in ready state") machine.wait_for_new_node_to_be_ready(machineset_name) # Get the node name of new spun node nodes_after_new_spun_node = get_worker_nodes() new_spun_node = list(set(nodes_after_new_spun_node) - set(initial_nodes)) log.info(f"New spun node is {new_spun_node}") # Label it node_obj = ocp.OCP(kind='node') node_obj.add_label(resource_name=new_spun_node[0], label=constants.OPERATOR_NODE_LABEL) log.info(f"Successfully labeled {new_spun_node} with OCS storage label")
def add_new_node_and_label_it( machineset_name, num_nodes=1, mark_for_ocs_label=True ): """ Add a new node for ipi and label it Args: machineset_name (str): Name of the machine set num_nodes (int): number of nodes to add mark_for_ocs_label (bool): True if label the new node eg: add_new_node_and_label_it("new-tdesala-zlqzn-worker-us-east-2a") Returns: list: new spun nodes """ # Get the initial nodes list initial_nodes = tests.helpers.get_worker_nodes() log.info(f"Current available worker nodes are {initial_nodes}") # get machineset replica count machineset_replica_count = machine.get_replica_count(machineset_name) log.info( f"{machineset_name} has replica count: {machineset_replica_count}" ) # Increase its replica count log.info(f"Increasing the replica count by {num_nodes}") machine.add_node(machineset_name, count=machineset_replica_count + num_nodes) log.info( f"{machineset_name} now has replica " f"count: {machineset_replica_count + num_nodes}" ) # wait for the new node to come to ready state log.info("Waiting for the new node to be in ready state") machine.wait_for_new_node_to_be_ready(machineset_name) # Get the node name of new spun node nodes_after_new_spun_node = tests.helpers.get_worker_nodes() new_spun_nodes = list( set(nodes_after_new_spun_node) - set(initial_nodes) ) log.info(f"New spun nodes: {new_spun_nodes}") # Label it if mark_for_ocs_label: node_obj = ocp.OCP(kind='node') for new_spun_node in new_spun_nodes: node_obj.add_label( resource_name=new_spun_node, label=constants.OPERATOR_NODE_LABEL ) logging.info( f"Successfully labeled {new_spun_node} with OCS storage label" ) return new_spun_nodes
def add_worker_based_on_pods_count_per_node( node_count, expected_count, role_type=None, machineset_name=None ): """ Function to evaluate number of pods up in node and add new node accordingly. Args: machineset_name (list): Machineset_names to add more nodes if required. node_count (int): Additional nodes to be added expected_count (int): Expected pod count in one node role_type (str): To add type to the nodes getting added Returns: bool: True if Nodes gets added, else false. """ # Check for POD running count on each nodes if ( config.ENV_DATA["deployment_type"] == "ipi" and config.ENV_DATA["platform"].lower() == "aws" ): app_nodes = node.get_nodes(node_type=role_type) pod_count_dict = node.get_running_pod_count_from_node(node_type=role_type) high_count_nodes, less_count_nodes = ([] for i in range(2)) for node_obj in app_nodes: count = pod_count_dict[f"{node_obj.name}"] if count >= expected_count: high_count_nodes.append(node_obj.name) else: less_count_nodes.append(node_obj.name) if len(less_count_nodes) <= 1: for name in machineset_name: count = machine.get_replica_count(machine_set=name) machine.add_node(machine_set=name, count=(count + node_count)) machine.wait_for_new_node_to_be_ready(name) return True else: logging.info( f"Enough pods can be created with available nodes {pod_count_dict}" ) return False elif ( config.ENV_DATA["deployment_type"] == "upi" and config.ENV_DATA["platform"].lower() == "vsphere" ): raise UnsupportedPlatformError("Unsupported Platform to add worker") elif ( config.ENV_DATA["deployment_type"] == "upi" and config.ENV_DATA["platform"].lower() == "baremetal" ): raise UnsupportedPlatformError("Unsupported Platform to add worker") elif ( config.ENV_DATA["deployment_type"] == "upi" and config.ENV_DATA["platform"].lower() == "azure" ): raise UnsupportedPlatformError("Unsupported Platform to add worker")
def add_worker_based_on_cpu_utilization( node_count, expected_percent, role_type=None, machineset_name=None ): """ Function to evaluate CPU utilization of nodes and add node if required. Args: machineset_name (list): Machineset_names to add more nodes if required. node_count (int): Additional nodes to be added expected_percent (int): Expected utilization precent role_type (str): To add type to the nodes getting added Returns: bool: True if Nodes gets added, else false. """ # Check for CPU utilization on each nodes if ( config.ENV_DATA["deployment_type"] == "ipi" and config.ENV_DATA["platform"].lower() == "aws" ): app_nodes = node.get_nodes(node_type=role_type) uti_dict = node.get_node_resource_utilization_from_oc_describe( node_type=role_type ) uti_high_nodes, uti_less_nodes = ([] for i in range(2)) for node_obj in app_nodes: utilization_percent = uti_dict[f"{node_obj.name}"]["cpu"] if utilization_percent > expected_percent: uti_high_nodes.append(node_obj.name) else: uti_less_nodes.append(node_obj.name) if len(uti_less_nodes) <= 1: for name in machineset_name: count = machine.get_replica_count(machine_set=name) machine.add_node(machine_set=name, count=(count + node_count)) machine.wait_for_new_node_to_be_ready(name) return True else: logging.info(f"Enough resource available for more pod creation {uti_dict}") return False elif ( config.ENV_DATA["deployment_type"] == "upi" and config.ENV_DATA["platform"].lower() == "vsphere" ): raise UnsupportedPlatformError("Unsupported Platform to add worker") elif ( config.ENV_DATA["deployment_type"] == "upi" and config.ENV_DATA["platform"].lower() == "baremetal" ): raise UnsupportedPlatformError("Unsupported Platform to add worker") elif ( config.ENV_DATA["deployment_type"] == "upi" and config.ENV_DATA["platform"].lower() == "azure" ): raise UnsupportedPlatformError("Unsupported Platform to add worker")
def test_add_node(self): """ Test for adding worker nodes to the cluster while IOs """ dt = config.ENV_DATA['deployment_type'] if dt == 'ipi': before_replica_counts = dict() count = 2 machines = machine_utils.get_machinesets() for machine in machines: before_replica_counts.update( {machine: machine_utils.get_replica_count(machine)}) worker_nodes_before = helpers.get_worker_nodes() logger.info( f'The worker nodes number before adding a new node is {len(worker_nodes_before)}' ) after_replica_counts = dict() for machine in machines: machine_utils.add_node(machine, count=count) after_replica_counts.update(({ machine: machine_utils.get_replica_count(machine) })) logger.info(after_replica_counts) for sample in TimeoutSampler(timeout=300, sleep=3, func=helpers.get_worker_nodes): if len(sample) == count * len(machines): break worker_nodes_after = helpers.get_worker_nodes() logger.info( f'The worker nodes number after adding a new node is {len(worker_nodes_after)}' ) wait_for_nodes_status(node_names=worker_nodes_after, status=constants.NODE_READY) else: pytest.skip("UPI not yet supported")
def check_and_add_enough_worker(worker_count): """ Function to check if there is enough workers available to scale pods. IF there is no enough worker then worker will be added based on supported platforms Function also adds scale label to the respective worker nodes. Args: worker_count (int): Expected worker count to be present in the setup Returns: book: True is there is enough worker count else raise exception. """ # Check either to use OCS workers for scaling app pods # Further continue to label the worker with scale label else not worker_list = node.get_worker_nodes() ocs_worker_list = machine.get_labeled_nodes(constants.OPERATOR_NODE_LABEL) scale_worker = machine.get_labeled_nodes(constants.SCALE_LABEL) if config.RUN.get("use_ocs_worker_for_scale"): if not scale_worker: helpers.label_worker_node(node_list=worker_list, label_key="scale-label", label_value="app-scale") else: if not scale_worker: for node_item in ocs_worker_list: worker_list.remove(node_item) if worker_list: helpers.label_worker_node( node_list=worker_list, label_key="scale-label", label_value="app-scale", ) scale_worker_list = machine.get_labeled_nodes(constants.SCALE_LABEL) logging.info(f"Print existing scale worker {scale_worker_list}") # Check if there is enough nodes to continue scaling of app pods if len(scale_worker_list) >= worker_count: logging.info(f"Setup has expected worker count {worker_count} " "to continue scale of pods") return True else: logging.info( "There is no enough worker in the setup, will add enough worker " "for the automation supported platforms") # Add enough worker for AWS if (config.ENV_DATA["deployment_type"] == "ipi" and config.ENV_DATA["platform"].lower() == "aws"): # Create machineset for app worker nodes on each aws zone # Each zone will have one app worker node ms_name = list() labels = [("node-role.kubernetes.io/app", "app-scale")] for obj in machine.get_machineset_objs(): if "app" in obj.name: ms_name.append(obj.name) if not ms_name: if len(machine.get_machineset_objs()) == 3: for zone in ["a", "b", "c"]: ms_name.append( machine.create_custom_machineset( instance_type="m5.4xlarge", labels=labels, zone=zone, )) else: ms_name.append( machine.create_custom_machineset( instance_type="m5.4xlarge", labels=labels, zone="a", )) for ms in ms_name: machine.wait_for_new_node_to_be_ready(ms) if len(ms_name) == 3: exp_count = int(worker_count / 3) else: exp_count = worker_count for name in ms_name: machine.add_node(machine_set=name, count=exp_count) for ms in ms_name: machine.wait_for_new_node_to_be_ready(ms) worker_list = node.get_worker_nodes() ocs_worker_list = machine.get_labeled_nodes( constants.OPERATOR_NODE_LABEL) scale_label_worker = machine.get_labeled_nodes( constants.SCALE_LABEL) ocs_worker_list.extend(scale_label_worker) final_list = list(dict.fromkeys(ocs_worker_list)) for node_item in final_list: if node_item in worker_list: worker_list.remove(node_item) if worker_list: helpers.label_worker_node( node_list=worker_list, label_key="scale-label", label_value="app-scale", ) return True elif (config.ENV_DATA["deployment_type"] == "upi" and config.ENV_DATA["platform"].lower() == "vsphere"): raise UnsupportedPlatformError( "Unsupported Platform to add worker") elif (config.ENV_DATA["deployment_type"] == "upi" and config.ENV_DATA["platform"].lower() == "baremetal"): raise UnsupportedPlatformError( "Unsupported Platform to add worker") elif (config.ENV_DATA["deployment_type"] == "upi" and config.ENV_DATA["platform"].lower() == "azure"): raise UnsupportedPlatformError( "Unsupported Platform to add worker") else: raise UnavailableResourceException( "There is no enough worker nodes to continue app pod scaling")
def identify_and_add_nodes(self, scenario, num_of_nodes): """ Fetches info about the worker nodes and add nodes (if required) Args: scenario (str): Scenario of app pods running on OCS or dedicated nodes (eg., 'colocated', 'dedicated') num_of_nodes (int): number of nodes required for running test Returns: tuple: tuple containing: list: list of OCS nodes name list: list of non-OCS nodes name """ nodes_to_add = 0 initial_worker_nodes = helpers.get_worker_nodes() ocs_nodes = machine.get_labeled_nodes(constants.OPERATOR_NODE_LABEL) non_ocs_nodes = list(set(initial_worker_nodes) - set(ocs_nodes)) if 'colocated' in scenario and len(ocs_nodes) < num_of_nodes: nodes_to_add = num_of_nodes - len(initial_worker_nodes) if 'dedicated' in scenario and len(non_ocs_nodes) < num_of_nodes: nodes_to_add = num_of_nodes - len(non_ocs_nodes) if nodes_to_add > 0: logger.info(f"{nodes_to_add} extra workers nodes needed") if config.ENV_DATA['deployment_type'] == 'ipi': machine_name = machine.get_machine_from_node_name( random.choice(initial_worker_nodes) ) machineset_name = machine.get_machineset_from_machine_name( machine_name ) machineset_replica_count = machine.get_replica_count( machineset_name ) machine.add_node( machineset_name, count=machineset_replica_count + nodes_to_add ) logger.info("Waiting for the new node(s) to be in ready state") machine.wait_for_new_node_to_be_ready(machineset_name) else: # TODO: Add required num of nodes instead of skipping # https://github.com/red-hat-storage/ocs-ci/issues/1291 pytest.skip("Add node not implemented for UPI, github issue #1291") new_worker_nodes = helpers.get_worker_nodes() new_nodes_added = list(set(new_worker_nodes) - set(initial_worker_nodes)) assert len(new_nodes_added) > 0, 'Extra nodes not added in the cluster' non_ocs_nodes += new_nodes_added if 'colocated' in scenario and len(ocs_nodes) < num_of_nodes: logger.info('Adding OCS storage label to Non-OCS workers') node_obj = ocp.OCP(kind=constants.NODE) nodes_to_label = non_ocs_nodes[0:(num_of_nodes - len(ocs_nodes))] for node_name in nodes_to_label: node_obj.add_label( resource_name=node_name, label=constants.OPERATOR_NODE_LABEL ) ocs_nodes.append(node_name) non_ocs_nodes = list(set(non_ocs_nodes) - set(ocs_nodes)) logger.info(f"The OCS nodes are : {ocs_nodes}") logger.info(f"The Non-OCS nodes are: {non_ocs_nodes}") return ocs_nodes, non_ocs_nodes
def identify_and_add_nodes(self, scenario, num_of_nodes): """ Fetches info about the worker nodes and add nodes (if required) Args: scenario (str): Scenario of app pods running on OCS or dedicated nodes (eg., 'colocated', 'dedicated') num_of_nodes (int): number of nodes required for running test Returns: tuple: tuple containing: list: list of OCS nodes name list: list of non-OCS nodes name """ nodes_to_add = 0 initial_worker_nodes = node.get_worker_nodes() ocs_nodes = machine.get_labeled_nodes(constants.OPERATOR_NODE_LABEL) non_ocs_nodes = list(set(initial_worker_nodes) - set(ocs_nodes)) if "colocated" in scenario and len(ocs_nodes) < num_of_nodes: nodes_to_add = num_of_nodes - len(initial_worker_nodes) if "dedicated" in scenario and len(non_ocs_nodes) < num_of_nodes: nodes_to_add = num_of_nodes - len(non_ocs_nodes) if nodes_to_add > 0: logger.info(f"{nodes_to_add} extra workers nodes needed") if config.ENV_DATA["deployment_type"] == "ipi": machine_name = machine.get_machine_from_node_name( random.choice(initial_worker_nodes)) machineset_name = machine.get_machineset_from_machine_name( machine_name) machineset_replica_count = machine.get_replica_count( machineset_name) machine.add_node(machineset_name, count=machineset_replica_count + nodes_to_add) logger.info("Waiting for the new node(s) to be in ready state") machine.wait_for_new_node_to_be_ready(machineset_name) else: if (config.ENV_DATA.get("platform").lower() == constants.VSPHERE_PLATFORM): pytest.skip( "Skipping add node in VSPHERE due to https://bugzilla.redhat.com/show_bug.cgi?id=1844521" ) is_rhel = config.ENV_DATA.get( "rhel_workers") or config.ENV_DATA.get("rhel_user") node_type = constants.RHEL_OS if is_rhel else constants.RHCOS node.add_new_node_and_label_upi( node_type=node_type, num_nodes=nodes_to_add, mark_for_ocs_label=False, ) new_worker_nodes = node.get_worker_nodes() new_nodes_added = list( set(new_worker_nodes) - set(initial_worker_nodes)) assert (len(new_nodes_added) == nodes_to_add ), "Extra nodes not added in the cluster" non_ocs_nodes += new_nodes_added if "colocated" in scenario and len(ocs_nodes) < num_of_nodes: logger.info("Adding OCS storage label to Non-OCS workers") node_obj = ocp.OCP(kind=constants.NODE) nodes_to_label = non_ocs_nodes[0:(num_of_nodes - len(ocs_nodes))] for node_name in nodes_to_label: node_obj.add_label(resource_name=node_name, label=constants.OPERATOR_NODE_LABEL) ocs_nodes.append(node_name) non_ocs_nodes = list(set(non_ocs_nodes) - set(ocs_nodes)) logger.info(f"The OCS nodes are : {ocs_nodes}") logger.info(f"The Non-OCS nodes are: {non_ocs_nodes}") return ocs_nodes, non_ocs_nodes