def add_new_node_and_label_it(machineset_name): """ Add a new node and label it Args: machineset_name (str): Name of the machine set """ # Get the initial nodes list initial_nodes = get_worker_nodes() log.info(f"Current available worker nodes are {initial_nodes}") # get machineset replica count machineset_replica_count = machine.get_replica_count(machineset_name) # Increase its replica count machine.add_node(machineset_name, count=machineset_replica_count + 1) log.info(f"Increased {machineset_name} count " f"by {machineset_replica_count + 1}") # wait for the new node to come to ready state log.info("Waiting for the new node to be in ready state") machine.wait_for_new_node_to_be_ready(machineset_name) # Get the node name of new spun node nodes_after_new_spun_node = get_worker_nodes() new_spun_node = list(set(nodes_after_new_spun_node) - set(initial_nodes)) log.info(f"New spun node is {new_spun_node}") # Label it node_obj = ocp.OCP(kind='node') node_obj.add_label(resource_name=new_spun_node[0], label=constants.OPERATOR_NODE_LABEL) log.info(f"Successfully labeled {new_spun_node} with OCS storage label")
def add_new_node_and_label_it( machineset_name, num_nodes=1, mark_for_ocs_label=True ): """ Add a new node for ipi and label it Args: machineset_name (str): Name of the machine set num_nodes (int): number of nodes to add mark_for_ocs_label (bool): True if label the new node eg: add_new_node_and_label_it("new-tdesala-zlqzn-worker-us-east-2a") Returns: list: new spun nodes """ # Get the initial nodes list initial_nodes = tests.helpers.get_worker_nodes() log.info(f"Current available worker nodes are {initial_nodes}") # get machineset replica count machineset_replica_count = machine.get_replica_count(machineset_name) log.info( f"{machineset_name} has replica count: {machineset_replica_count}" ) # Increase its replica count log.info(f"Increasing the replica count by {num_nodes}") machine.add_node(machineset_name, count=machineset_replica_count + num_nodes) log.info( f"{machineset_name} now has replica " f"count: {machineset_replica_count + num_nodes}" ) # wait for the new node to come to ready state log.info("Waiting for the new node to be in ready state") machine.wait_for_new_node_to_be_ready(machineset_name) # Get the node name of new spun node nodes_after_new_spun_node = tests.helpers.get_worker_nodes() new_spun_nodes = list( set(nodes_after_new_spun_node) - set(initial_nodes) ) log.info(f"New spun nodes: {new_spun_nodes}") # Label it if mark_for_ocs_label: node_obj = ocp.OCP(kind='node') for new_spun_node in new_spun_nodes: node_obj.add_label( resource_name=new_spun_node, label=constants.OPERATOR_NODE_LABEL ) logging.info( f"Successfully labeled {new_spun_node} with OCS storage label" ) return new_spun_nodes
def add_worker_based_on_pods_count_per_node( node_count, expected_count, role_type=None, machineset_name=None ): """ Function to evaluate number of pods up in node and add new node accordingly. Args: machineset_name (list): Machineset_names to add more nodes if required. node_count (int): Additional nodes to be added expected_count (int): Expected pod count in one node role_type (str): To add type to the nodes getting added Returns: bool: True if Nodes gets added, else false. """ # Check for POD running count on each nodes if ( config.ENV_DATA["deployment_type"] == "ipi" and config.ENV_DATA["platform"].lower() == "aws" ): app_nodes = node.get_nodes(node_type=role_type) pod_count_dict = node.get_running_pod_count_from_node(node_type=role_type) high_count_nodes, less_count_nodes = ([] for i in range(2)) for node_obj in app_nodes: count = pod_count_dict[f"{node_obj.name}"] if count >= expected_count: high_count_nodes.append(node_obj.name) else: less_count_nodes.append(node_obj.name) if len(less_count_nodes) <= 1: for name in machineset_name: count = machine.get_replica_count(machine_set=name) machine.add_node(machine_set=name, count=(count + node_count)) machine.wait_for_new_node_to_be_ready(name) return True else: logging.info( f"Enough pods can be created with available nodes {pod_count_dict}" ) return False elif ( config.ENV_DATA["deployment_type"] == "upi" and config.ENV_DATA["platform"].lower() == "vsphere" ): raise UnsupportedPlatformError("Unsupported Platform to add worker") elif ( config.ENV_DATA["deployment_type"] == "upi" and config.ENV_DATA["platform"].lower() == "baremetal" ): raise UnsupportedPlatformError("Unsupported Platform to add worker") elif ( config.ENV_DATA["deployment_type"] == "upi" and config.ENV_DATA["platform"].lower() == "azure" ): raise UnsupportedPlatformError("Unsupported Platform to add worker")
def add_worker_based_on_cpu_utilization( node_count, expected_percent, role_type=None, machineset_name=None ): """ Function to evaluate CPU utilization of nodes and add node if required. Args: machineset_name (list): Machineset_names to add more nodes if required. node_count (int): Additional nodes to be added expected_percent (int): Expected utilization precent role_type (str): To add type to the nodes getting added Returns: bool: True if Nodes gets added, else false. """ # Check for CPU utilization on each nodes if ( config.ENV_DATA["deployment_type"] == "ipi" and config.ENV_DATA["platform"].lower() == "aws" ): app_nodes = node.get_nodes(node_type=role_type) uti_dict = node.get_node_resource_utilization_from_oc_describe( node_type=role_type ) uti_high_nodes, uti_less_nodes = ([] for i in range(2)) for node_obj in app_nodes: utilization_percent = uti_dict[f"{node_obj.name}"]["cpu"] if utilization_percent > expected_percent: uti_high_nodes.append(node_obj.name) else: uti_less_nodes.append(node_obj.name) if len(uti_less_nodes) <= 1: for name in machineset_name: count = machine.get_replica_count(machine_set=name) machine.add_node(machine_set=name, count=(count + node_count)) machine.wait_for_new_node_to_be_ready(name) return True else: logging.info(f"Enough resource available for more pod creation {uti_dict}") return False elif ( config.ENV_DATA["deployment_type"] == "upi" and config.ENV_DATA["platform"].lower() == "vsphere" ): raise UnsupportedPlatformError("Unsupported Platform to add worker") elif ( config.ENV_DATA["deployment_type"] == "upi" and config.ENV_DATA["platform"].lower() == "baremetal" ): raise UnsupportedPlatformError("Unsupported Platform to add worker") elif ( config.ENV_DATA["deployment_type"] == "upi" and config.ENV_DATA["platform"].lower() == "azure" ): raise UnsupportedPlatformError("Unsupported Platform to add worker")
def test_add_node(self): """ Test for adding worker nodes to the cluster while IOs """ dt = config.ENV_DATA['deployment_type'] if dt == 'ipi': before_replica_counts = dict() count = 2 machines = machine_utils.get_machinesets() for machine in machines: before_replica_counts.update( {machine: machine_utils.get_replica_count(machine)}) worker_nodes_before = helpers.get_worker_nodes() logger.info( f'The worker nodes number before adding a new node is {len(worker_nodes_before)}' ) after_replica_counts = dict() for machine in machines: machine_utils.add_node(machine, count=count) after_replica_counts.update(({ machine: machine_utils.get_replica_count(machine) })) logger.info(after_replica_counts) for sample in TimeoutSampler(timeout=300, sleep=3, func=helpers.get_worker_nodes): if len(sample) == count * len(machines): break worker_nodes_after = helpers.get_worker_nodes() logger.info( f'The worker nodes number after adding a new node is {len(worker_nodes_after)}' ) wait_for_nodes_status(node_names=worker_nodes_after, status=constants.NODE_READY) else: pytest.skip("UPI not yet supported")
def identify_and_add_nodes(self, scenario, num_of_nodes): """ Fetches info about the worker nodes and add nodes (if required) Args: scenario (str): Scenario of app pods running on OCS or dedicated nodes (eg., 'colocated', 'dedicated') num_of_nodes (int): number of nodes required for running test Returns: tuple: tuple containing: list: list of OCS nodes name list: list of non-OCS nodes name """ nodes_to_add = 0 initial_worker_nodes = helpers.get_worker_nodes() ocs_nodes = machine.get_labeled_nodes(constants.OPERATOR_NODE_LABEL) non_ocs_nodes = list(set(initial_worker_nodes) - set(ocs_nodes)) if 'colocated' in scenario and len(ocs_nodes) < num_of_nodes: nodes_to_add = num_of_nodes - len(initial_worker_nodes) if 'dedicated' in scenario and len(non_ocs_nodes) < num_of_nodes: nodes_to_add = num_of_nodes - len(non_ocs_nodes) if nodes_to_add > 0: logger.info(f"{nodes_to_add} extra workers nodes needed") if config.ENV_DATA['deployment_type'] == 'ipi': machine_name = machine.get_machine_from_node_name( random.choice(initial_worker_nodes) ) machineset_name = machine.get_machineset_from_machine_name( machine_name ) machineset_replica_count = machine.get_replica_count( machineset_name ) machine.add_node( machineset_name, count=machineset_replica_count + nodes_to_add ) logger.info("Waiting for the new node(s) to be in ready state") machine.wait_for_new_node_to_be_ready(machineset_name) else: # TODO: Add required num of nodes instead of skipping # https://github.com/red-hat-storage/ocs-ci/issues/1291 pytest.skip("Add node not implemented for UPI, github issue #1291") new_worker_nodes = helpers.get_worker_nodes() new_nodes_added = list(set(new_worker_nodes) - set(initial_worker_nodes)) assert len(new_nodes_added) > 0, 'Extra nodes not added in the cluster' non_ocs_nodes += new_nodes_added if 'colocated' in scenario and len(ocs_nodes) < num_of_nodes: logger.info('Adding OCS storage label to Non-OCS workers') node_obj = ocp.OCP(kind=constants.NODE) nodes_to_label = non_ocs_nodes[0:(num_of_nodes - len(ocs_nodes))] for node_name in nodes_to_label: node_obj.add_label( resource_name=node_name, label=constants.OPERATOR_NODE_LABEL ) ocs_nodes.append(node_name) non_ocs_nodes = list(set(non_ocs_nodes) - set(ocs_nodes)) logger.info(f"The OCS nodes are : {ocs_nodes}") logger.info(f"The Non-OCS nodes are: {non_ocs_nodes}") return ocs_nodes, non_ocs_nodes
def identify_and_add_nodes(self, scenario, num_of_nodes): """ Fetches info about the worker nodes and add nodes (if required) Args: scenario (str): Scenario of app pods running on OCS or dedicated nodes (eg., 'colocated', 'dedicated') num_of_nodes (int): number of nodes required for running test Returns: tuple: tuple containing: list: list of OCS nodes name list: list of non-OCS nodes name """ nodes_to_add = 0 initial_worker_nodes = node.get_worker_nodes() ocs_nodes = machine.get_labeled_nodes(constants.OPERATOR_NODE_LABEL) non_ocs_nodes = list(set(initial_worker_nodes) - set(ocs_nodes)) if "colocated" in scenario and len(ocs_nodes) < num_of_nodes: nodes_to_add = num_of_nodes - len(initial_worker_nodes) if "dedicated" in scenario and len(non_ocs_nodes) < num_of_nodes: nodes_to_add = num_of_nodes - len(non_ocs_nodes) if nodes_to_add > 0: logger.info(f"{nodes_to_add} extra workers nodes needed") if config.ENV_DATA["deployment_type"] == "ipi": machine_name = machine.get_machine_from_node_name( random.choice(initial_worker_nodes)) machineset_name = machine.get_machineset_from_machine_name( machine_name) machineset_replica_count = machine.get_replica_count( machineset_name) machine.add_node(machineset_name, count=machineset_replica_count + nodes_to_add) logger.info("Waiting for the new node(s) to be in ready state") machine.wait_for_new_node_to_be_ready(machineset_name) else: if (config.ENV_DATA.get("platform").lower() == constants.VSPHERE_PLATFORM): pytest.skip( "Skipping add node in VSPHERE due to https://bugzilla.redhat.com/show_bug.cgi?id=1844521" ) is_rhel = config.ENV_DATA.get( "rhel_workers") or config.ENV_DATA.get("rhel_user") node_type = constants.RHEL_OS if is_rhel else constants.RHCOS node.add_new_node_and_label_upi( node_type=node_type, num_nodes=nodes_to_add, mark_for_ocs_label=False, ) new_worker_nodes = node.get_worker_nodes() new_nodes_added = list( set(new_worker_nodes) - set(initial_worker_nodes)) assert (len(new_nodes_added) == nodes_to_add ), "Extra nodes not added in the cluster" non_ocs_nodes += new_nodes_added if "colocated" in scenario and len(ocs_nodes) < num_of_nodes: logger.info("Adding OCS storage label to Non-OCS workers") node_obj = ocp.OCP(kind=constants.NODE) nodes_to_label = non_ocs_nodes[0:(num_of_nodes - len(ocs_nodes))] for node_name in nodes_to_label: node_obj.add_label(resource_name=node_name, label=constants.OPERATOR_NODE_LABEL) ocs_nodes.append(node_name) non_ocs_nodes = list(set(non_ocs_nodes) - set(ocs_nodes)) logger.info(f"The OCS nodes are : {ocs_nodes}") logger.info(f"The Non-OCS nodes are: {non_ocs_nodes}") return ocs_nodes, non_ocs_nodes