def test_add_node_cleanup(self): """ Test to cleanup possible resources created in TestAddNode class """ # Get info from SCALE_DATA_FILE for validation if os.path.exists(SCALE_DATA_FILE): file_data = templating.load_yaml(SCALE_DATA_FILE) namespace = file_data.get("NAMESPACE") pod_obj_file_list = file_data.get("POD_OBJ_FILE_LIST") pvc_obj_file_list = file_data.get("PVC_OBJ_FILE_LIST") else: raise FileNotFoundError ocs_obj = OCP(namespace=namespace) # Delete pods for obj_file in pod_obj_file_list: obj_file_path = f"{log_path}/{obj_file}" cmd_str = f"delete -f {obj_file_path}" ocs_obj.exec_oc_cmd(command=cmd_str) # Delete pvcs for obj_file in pvc_obj_file_list: obj_file_path = f"{log_path}/{obj_file}" cmd_str = f"delete -f {obj_file_path}" ocs_obj.exec_oc_cmd(command=cmd_str) # Delete machineset for obj in machine.get_machineset_objs(): if "app" in obj.name: machine.delete_custom_machineset(obj.name)
def check_and_add_enough_worker(worker_count): """ Function to check if there is enough workers available to scale pods. IF there is no enough worker then worker will be added based on supported platforms Function also adds scale label to the respective worker nodes. Args: worker_count (int): Expected worker count to be present in the setup Returns: book: True is there is enough worker count else raise exception. """ # Check either to use OCS workers for scaling app pods # Further continue to label the worker with scale label else not worker_list = node.get_worker_nodes() ocs_worker_list = machine.get_labeled_nodes(constants.OPERATOR_NODE_LABEL) scale_worker = machine.get_labeled_nodes(constants.SCALE_LABEL) if config.RUN.get("use_ocs_worker_for_scale"): if not scale_worker: helpers.label_worker_node(node_list=worker_list, label_key="scale-label", label_value="app-scale") else: if not scale_worker: for node_item in ocs_worker_list: worker_list.remove(node_item) if worker_list: helpers.label_worker_node( node_list=worker_list, label_key="scale-label", label_value="app-scale", ) scale_worker_list = machine.get_labeled_nodes(constants.SCALE_LABEL) logging.info(f"Print existing scale worker {scale_worker_list}") # Check if there is enough nodes to continue scaling of app pods if len(scale_worker_list) >= worker_count: logging.info(f"Setup has expected worker count {worker_count} " "to continue scale of pods") return True else: logging.info( "There is no enough worker in the setup, will add enough worker " "for the automation supported platforms") # Add enough worker for AWS if (config.ENV_DATA["deployment_type"] == "ipi" and config.ENV_DATA["platform"].lower() == "aws"): # Create machineset for app worker nodes on each aws zone # Each zone will have one app worker node ms_name = list() labels = [("node-role.kubernetes.io/app", "app-scale")] for obj in machine.get_machineset_objs(): if "app" in obj.name: ms_name.append(obj.name) if not ms_name: if len(machine.get_machineset_objs()) == 3: for zone in ["a", "b", "c"]: ms_name.append( machine.create_custom_machineset( instance_type="m5.4xlarge", labels=labels, zone=zone, )) else: ms_name.append( machine.create_custom_machineset( instance_type="m5.4xlarge", labels=labels, zone="a", )) for ms in ms_name: machine.wait_for_new_node_to_be_ready(ms) if len(ms_name) == 3: exp_count = int(worker_count / 3) else: exp_count = worker_count for name in ms_name: machine.add_node(machine_set=name, count=exp_count) for ms in ms_name: machine.wait_for_new_node_to_be_ready(ms) worker_list = node.get_worker_nodes() ocs_worker_list = machine.get_labeled_nodes( constants.OPERATOR_NODE_LABEL) scale_label_worker = machine.get_labeled_nodes( constants.SCALE_LABEL) ocs_worker_list.extend(scale_label_worker) final_list = list(dict.fromkeys(ocs_worker_list)) for node_item in final_list: if node_item in worker_list: worker_list.remove(node_item) if worker_list: helpers.label_worker_node( node_list=worker_list, label_key="scale-label", label_value="app-scale", ) return True elif (config.ENV_DATA["deployment_type"] == "upi" and config.ENV_DATA["platform"].lower() == "vsphere"): raise UnsupportedPlatformError( "Unsupported Platform to add worker") elif (config.ENV_DATA["deployment_type"] == "upi" and config.ENV_DATA["platform"].lower() == "baremetal"): raise UnsupportedPlatformError( "Unsupported Platform to add worker") elif (config.ENV_DATA["deployment_type"] == "upi" and config.ENV_DATA["platform"].lower() == "azure"): raise UnsupportedPlatformError( "Unsupported Platform to add worker") else: raise UnavailableResourceException( "There is no enough worker nodes to continue app pod scaling")
def create_scale_pods( self, scale_count=1500, pods_per_iter=5, io_runtime=None, pvc_size=None, start_io=None, ): """ Main Function with scale pod creation flow and checks to add nodes. For other platforms will not be considering the instance_type param Args: scale_count (int): Scale pod+pvc count io_runtime (sec): Fio run time in seconds start_io (bool): If True start IO else don't pods_per_iter (int): Number of PVC-POD to be created per PVC type pvc_size (Gi): size of PVC Example, If 5 then 20 PVC+POD will be created with 5 each of 4 PVC types Test value in-between 5-10 """ self.ms_name, all_pod_obj = ([] for i in range(2)) if not 5 <= pods_per_iter <= 10: raise UnexpectedBehaviour( "Pods_per_iter value should be in-between 5-15") # Check for expected worker count expected_worker_count = get_expected_worker_count(scale_count) if check_and_add_enough_worker(expected_worker_count): if (config.ENV_DATA["deployment_type"] == "ipi" and config.ENV_DATA["platform"].lower() == "aws"): for obj in machine.get_machineset_objs(): if "app" in obj.name: self.ms_name.append(obj.name) else: self.ms_name = [] # Create namespace self.create_and_set_namespace() # Continue to iterate till the scale pvc limit is reached while True: if scale_count <= len(all_pod_obj): logger.info(f"Scaled {scale_count} pvc and pods") if cluster.validate_pg_balancer(): logging.info( "OSD consumption and PG distribution is good to continue" ) else: raise UnexpectedBehaviour( "Unequal PG distribution to OSDs") break else: logger.info(f"Scaled PVC and POD count {len(all_pod_obj)}") self.pod_obj, self.pvc_obj = self.create_multi_pvc_pod( pods_per_iter, io_runtime, start_io, pvc_size) all_pod_obj.extend(self.pod_obj) try: # Check enough resources available in the dedicated app workers check_enough_resource_available_in_workers( self.ms_name, self.pod_dict_path) # Check for ceph cluster OSD utilization if not cluster.validate_osd_utilization(osd_used=75): logging.info("Cluster OSD utilization is below 75%") elif not cluster.validate_osd_utilization(osd_used=83): logger.warning("Cluster OSD utilization is above 75%") else: raise CephHealthException("Cluster OSDs are near full") # Check for 500 pods per namespace pod_objs = pod.get_all_pods( namespace=self.namespace_list[-1].namespace) if len(pod_objs) >= 500: self.create_and_set_namespace() except UnexpectedBehaviour: logging.error( f"Scaling of cluster failed after {len(all_pod_obj)} pod creation" ) raise UnexpectedBehaviour( "Scaling PVC+POD failed analyze setup and log for more details" )
def create_scale_pods(self, scale_count=1500, pvc_per_pod_count=20): """ Main Function with scale pod creation flow and checks to add nodes for the supported platforms, validates pg-balancer after scaling Function breaks the scale_count in multiples of 750 and iterates those many time to reach the desired count. Args: scale_count (int): No of PVCs to be Scaled pvc_per_pod_count (int): Number of PVCs to be attached to single POD Example, If 20 then 20 PVCs will be attached to single POD """ # Minimal scale creation count should be 750, code is optimized to # scale PVC's not more than 750 count. # Used max_pvc_count+10 in certain places to round up the value. # i.e. while attaching 20 PVCs to single pod with 750 PVCs last pod # will left out with 10 PVCs so to avoid the problem scaling 10 more. max_pvc_count = 750 if scale_count < max_pvc_count: raise UnexpectedBehaviour("Minimal scale PVC creation count should be 750") self.ms_name = list() # Check for expected worker count expected_worker_count = get_expected_worker_count(scale_count) if check_and_add_enough_worker(expected_worker_count): if ( config.ENV_DATA["deployment_type"] == "ipi" and config.ENV_DATA["platform"].lower() == "aws" ): for obj in machine.get_machineset_objs(): if "app" in obj.name: self.ms_name.append(obj.name) else: self.ms_name = [] # Create namespace self.create_and_set_namespace() expected_itr_counter = int(scale_count / max_pvc_count) actual_itr_counter = 0 # Continue to iterate till the scale pvc limit is reached while True: if actual_itr_counter == expected_itr_counter: logging.info( f"Scaled {scale_count} PVCs and created {scale_count/20} PODs" ) if cluster.validate_pg_balancer(): logging.info( "OSD consumption and PG distribution is good to continue" ) else: raise UnexpectedBehaviour("Unequal PG distribution to OSDs") break else: actual_itr_counter += 1 rbd_pvc, fs_pvc, pod_running = self.create_multi_pvc_pod( pvc_count=max_pvc_count + 10, pvcs_per_pod=pvc_per_pod_count, obj_name=f"obj{actual_itr_counter}", ) logging.info( f"Scaled {len(rbd_pvc)+len(fs_pvc)} PVCs and Created " f"{len(pod_running)} PODs in interation {actual_itr_counter}" ) logging.info( f"Scaled {actual_itr_counter * (max_pvc_count+10)} PVC's and " f"Created {int((actual_itr_counter * (max_pvc_count+10))/20)} PODs" )
def add_worker_node(instance_type=None): global ms_name ms_name = list() worker_list = node.get_worker_nodes() ocs_worker_list = machine.get_labeled_nodes(constants.OPERATOR_NODE_LABEL) scale_worker = machine.get_labeled_nodes(constants.SCALE_LABEL) if config.RUN.get("use_ocs_worker_for_scale"): if not scale_worker: helpers.label_worker_node(node_list=worker_list, label_key="scale-label", label_value="app-scale") else: if not scale_worker: for node_item in ocs_worker_list: worker_list.remove(node_item) if worker_list: helpers.label_worker_node( node_list=worker_list, label_key="scale-label", label_value="app-scale", ) scale_worker_list = machine.get_labeled_nodes(constants.SCALE_LABEL) logging.info(f"Print existing scale worker {scale_worker_list}") if (config.ENV_DATA["deployment_type"] == "ipi" and config.ENV_DATA["platform"].lower() == "aws"): log.info("Adding worker nodes on the current cluster") labels = [("node-role.kubernetes.io/app", "app-scale")] # Create machineset for app worker nodes on each zone for obj in machine.get_machineset_objs(): if "app" in obj.name: ms_name.append(obj.name) if instance_type is not None: instance_type = instance_type else: instance_type = "m5.4xlarge" if not ms_name: if len(machine.get_machineset_objs()) == 3: for zone in ["a", "b", "c"]: ms_name.append( machine.create_custom_machineset( instance_type=instance_type, labels=labels, zone=zone, )) else: ms_name.append( machine.create_custom_machineset( instance_type=instance_type, labels=labels, zone="a", )) for ms in ms_name: machine.wait_for_new_node_to_be_ready(ms) worker_list = node.get_worker_nodes() ocs_worker_list = machine.get_labeled_nodes( constants.OPERATOR_NODE_LABEL) scale_label_worker = machine.get_labeled_nodes(constants.SCALE_LABEL) ocs_worker_list.extend(scale_label_worker) final_list = list(dict.fromkeys(ocs_worker_list)) for node_item in final_list: if node_item in worker_list: worker_list.remove(node_item) if worker_list: helpers.label_worker_node(node_list=worker_list, label_key="scale-label", label_value="app-scale") return True elif (config.ENV_DATA["deployment_type"] == "upi" and config.ENV_DATA["platform"].lower() == "vsphere"): log.info("Running scale test on existing worker nodes.") elif (config.ENV_DATA["deployment_type"] == "upi" and config.ENV_DATA["platform"].lower() == "baremetal"): log.info("Running scale test on existing worker nodes.") elif (config.ENV_DATA["deployment_type"] == "upi" and config.ENV_DATA["platform"].lower() == "azure"): raise UnsupportedPlatformError("Unsupported Platform")
def add_worker_node(instance_type=None): global ms_name ms_name = list() worker_list = helpers.get_worker_nodes() ocs_worker_list = machine.get_labeled_nodes(constants.OPERATOR_NODE_LABEL) scale_worker = machine.get_labeled_nodes(constants.SCALE_LABEL) if config.RUN.get('use_ocs_worker_for_scale'): if not scale_worker: helpers.label_worker_node(node_list=worker_list, label_key='scale-label', label_value='app-scale') else: if not scale_worker: for node_item in ocs_worker_list: worker_list.remove(node_item) if worker_list: helpers.label_worker_node(node_list=worker_list, label_key='scale-label', label_value='app-scale') scale_worker_list = machine.get_labeled_nodes(constants.SCALE_LABEL) logging.info(f"Print existing scale worker {scale_worker_list}") if config.ENV_DATA['deployment_type'] == 'ipi' and config.ENV_DATA[ 'platform'].lower() == 'aws': log.info("Adding worker nodes on the current cluster") # Create machineset for app worker nodes on each zone for obj in machine.get_machineset_objs(): if 'app' in obj.name: ms_name.append(obj.name) if instance_type is not None: instance_type = instance_type else: instance_type = 'm5.4xlarge' if not ms_name: if len(machine.get_machineset_objs()) == 3: for zone in ['a', 'b', 'c']: ms_name.append( machine.create_custom_machineset( instance_type=instance_type, zone=zone)) else: ms_name.append( machine.create_custom_machineset( instance_type=instance_type, zone='a')) for ms in ms_name: machine.wait_for_new_node_to_be_ready(ms) worker_list = helpers.get_worker_nodes() ocs_worker_list = machine.get_labeled_nodes( constants.OPERATOR_NODE_LABEL) scale_label_worker = machine.get_labeled_nodes(constants.SCALE_LABEL) ocs_worker_list.extend(scale_label_worker) final_list = list(dict.fromkeys(ocs_worker_list)) for node_item in final_list: if node_item in worker_list: worker_list.remove(node_item) if worker_list: helpers.label_worker_node(node_list=worker_list, label_key='scale-label', label_value='app-scale') return True elif config.ENV_DATA['deployment_type'] == 'upi' and config.ENV_DATA[ 'platform'].lower() == 'vsphere': log.info('Running pgsql on existing worker nodes') elif config.ENV_DATA['deployment_type'] == 'upi' and config.ENV_DATA[ 'platform'].lower() == 'baremetal': log.info('Running pgsql on existing worker nodes') elif config.ENV_DATA['deployment_type'] == 'upi' and config.ENV_DATA[ 'platform'].lower() == 'azure': raise UnsupportedPlatformError("Unsupported Platform")