def test_delete_cached_object(self, bucket_factory, mcg_obj, cld_mgr, awscli_pod, bucketclass_dict): """ Test the deletion of an object that is present in the cache of a cache bucket. """ # Create the cached namespace bucket on top of the namespace resource bucket_obj = bucket_factory(bucketclass=bucketclass_dict)[0] # Upload files to NS bucket writen_objs_names = self.write_files_to_pod_and_upload( mcg_obj, awscli_pod, bucket_to_write=bucket_obj.name, amount=1) wait_for_cache(mcg_obj, bucket_obj.name, writen_objs_names) # Delete the object from mcg interface s3_delete_object(mcg_obj, bucket_obj.name, writen_objs_names[0]) sleep(5) if not check_cached_objects_by_name(mcg_obj, bucket_obj.name): raise UnexpectedBehaviour( "Object was not deleted from cache properly") # Check deletion in the cloud provider aws_target_bucket = bucket_obj.bucketclass.namespacestores[0].uls_name aws_obj_list = list( cld_mgr.aws_client.client.Bucket(aws_target_bucket).objects.all()) if writen_objs_names[0] in aws_obj_list: raise UnexpectedBehaviour( "Object was not deleted from cache properly")
def get_registry_pod_obj(): """ Function to get registry pod obj Returns: pod_obj (list): List of Registry pod objs Raises: UnexpectedBehaviour: When image-registry pod is not present. """ # Sometimes when there is a update in config crd, there will be 2 registry pods # i.e. old pod will be terminated and new pod will be up based on new crd # so below loop waits till old pod terminates wait_time = 30 for iteration in range(10): pod_data = pod.get_pods_having_label( label='docker-registry=default', namespace=constants.OPENSHIFT_IMAGE_REGISTRY_NAMESPACE) pod_obj = [pod.Pod(**data) for data in pod_data] if len(pod_obj) == 1: break elif len(pod_obj) == 0: raise UnexpectedBehaviour("Image-registry pod not present") elif iteration > 5: raise UnexpectedBehaviour( "Waited for 3 mins Image-registry pod is not in Running state") else: logger.info( f"Waiting for 30 sec's for registry pod to be up iteration {iteration}" ) time.sleep(wait_time) return pod_obj
def test_scale_pvcs_pods_post_upgrade(): """ Function to scale PVCs and PODs """ # Get info from SCALE_DATA_FILE for validation if os.path.exists(SCALE_DATA_FILE): file_data = templating.load_yaml(SCALE_DATA_FILE) namespace = file_data.get("NAMESPACE") pod_scale_list = file_data.get("POD_SCALE_LIST") pvc_scale_list = file_data.get("PVC_SCALE_LIST") else: raise FileNotFoundError # Get all PVCs from namespace all_pvc_dict = get_all_pvcs(namespace=namespace) pvc_bound_list, pvc_not_bound_list = ([], []) for i in range(len(pvc_scale_list)): pvc_data = all_pvc_dict["items"][i] if not pvc_data["status"]["phase"] == constants.STATUS_BOUND: pvc_not_bound_list.append(pvc_data["metadata"]["name"]) else: pvc_bound_list.append(pvc_data["metadata"]["name"]) # Get all PODs from namespace ocp_pod_obj = OCP(kind=constants.DEPLOYMENTCONFIG, namespace=namespace) all_pods_dict = ocp_pod_obj.get() pod_running_list, pod_not_running_list = ([], []) for i in range(len(pod_scale_list)): pod_data = all_pods_dict["items"][i] if not pod_data["status"]["availableReplicas"]: pod_not_running_list.append(pod_data["metadata"]["name"]) else: pod_running_list.append(pod_data["metadata"]["name"]) # Check status of PVCs PODs scaled in pre-upgrade if not len(pvc_bound_list) == len(pvc_scale_list): raise UnexpectedBehaviour( f"PVC Bound count mismatch {len(pvc_not_bound_list)} PVCs not in Bound state " f"PVCs not in Bound state {pvc_not_bound_list}") else: logging.info( f"All the expected {len(pvc_bound_list)} PVCs are in Bound state") if not len(pod_running_list) == len(pod_scale_list): raise UnexpectedBehaviour( f"POD Running count mismatch {len(pod_not_running_list)} PODs not in Running state " f"PODs not in Running state {pod_not_running_list}") else: logging.info( f"All the expected {len(pod_running_list)} PODs are in Running state" ) # Check ceph health status utils.ceph_health_check()
def validate_workload(self, workload_id, workload_name): """ Validates each stage of cosbench workload Args: workload_id (str): ID of cosbench workload workload_name (str): Name of the workload Raises: UnexpectedBehaviour: When workload csv is incorrect/malformed. """ workload_csv = self.get_result_csv(workload_id=workload_id, workload_name=workload_name) with open(workload_csv, "r") as file: reader = csv.reader(file) header = next(reader) if header is not None: # Iterate over each row after the header logger.info( f"Verifying whether each stage of workload {workload_id} completed" ) for row in reader: if row[16] == "completed": logger.info(f"Stage {row[0]} completed successfully") else: assert ( f"Failed: Stage {row[0]} did not complete. Status {row[16]}" ) else: raise UnexpectedBehaviour( f"Workload csv is incorrect/malformed. Dumping csv {reader}" )
def verify_multi_attach_error(self, pod_list): """ Checks for the expected failure event message in oc describe command Args: pod_list (list): list of pod objects Returns: bool: True if Multi-Attach Error is found in oc describe Raises: UnexpectedBehaviour: If Multi-Attach Error not found in describe command """ failure_str = "Multi-Attach error for volume" for pod_obj in pod_list: if failure_str in pod_obj.describe(): logger.info( f"Multi-Attach error is present in oc describe of {pod_obj.name}" ) else: logger.warning( f"Multi-Attach error is not found in oc describe of {pod_obj.name}" ) raise UnexpectedBehaviour(pod_obj.name, pod_obj.describe()) return True
def increase_pods_per_worker_node_count(pods_per_node=500, pods_per_core=10): """ Function to increase pods per node count, default OCP supports 250 pods per node, from OCP 4.6 limit is going to be 500, but using this function can override this param to create more pods per worker nodes. more detail: https://docs.openshift.com/container-platform/4.5/nodes/nodes/nodes-nodes-managing-max-pods.html Example: The default value for podsPerCore is 10 and the default value for maxPods is 250. This means that unless the node has 25 cores or more, by default, podsPerCore will be the limiting factor. WARN: This function will perform Unscheduling of workers and reboot so Please aware if there is any non-dc pods then expected to be terminated. Args: pods_per_node (int): Pods per node limit count pods_per_core (int): Pods per core limit count Raise: UnexpectedBehaviour if machineconfigpool not in Updating state within 40secs. """ max_pods_template = templating.load_yaml( constants.PODS_PER_NODE_COUNT_YAML) max_pods_template["spec"]["kubeletConfig"]["podsPerCore"] = pods_per_core max_pods_template["spec"]["kubeletConfig"]["maxPods"] = pods_per_node # Create new max-pods label max_pods_obj = OCS(**max_pods_template) assert max_pods_obj.create() # Apply the changes in the workers label_cmd = "label machineconfigpool worker custom-kubelet=small-pods" ocp = OCP() assert ocp.exec_oc_cmd(command=label_cmd) # First wait for Updating status to become True, default it will be False & # machine_count and ready_machine_count will be equal get_cmd = "get machineconfigpools -o yaml" timout_counter = 0 while True: output = ocp.exec_oc_cmd(command=get_cmd) update_status = (output.get("items")[1].get("status").get("conditions") [4].get("status")) if update_status == "True": break elif timout_counter >= 8: raise UnexpectedBehaviour( "After 40sec machineconfigpool not in Updating state") else: logging.info("Sleep 5secs for updating status change") timout_counter += 1 time.sleep(5) # Validate either change is successful output = ocp.exec_oc_cmd(command=get_cmd) machine_count = output.get("items")[1].get("status").get("machineCount") # During manual execution observed each node took 240+ sec for update timeout = machine_count * 300 utils.wait_for_machineconfigpool_status(node_type=constants.WORKER_MACHINE, timeout=timeout)
def verify_pv_not_exists(pv_name, cbp_name): """ Ensure that pv does not exists """ # Validate on ceph side logger.info(f"Verifying pv {pv_name} exists on backend") ct_pod = pod.get_ceph_tools_pod() pvc_list = ct_pod.exec_ceph_cmd( ceph_cmd=f"rbd ls -p {cbp_name}", format='json' ) _rc = pv_name in pvc_list if _rc: raise UnexpectedBehaviour(f"pv {pv_name} exists on backend") logger.info( f"Expected: pv {pv_name} doesn't exist on backend after deleting pvc" ) # Validate on oc side try: PV.get(pv_name) except CommandFailed as ecf: assert "not found" in str(ecf), ( f"Unexpected: pv {pv_name} still exists" ) logger.info( f"Expected: pv should not be found " f"after deleting corresponding pvc" )
def cleanup(self): """ Removes resources created during test execution and verifies the reclaim policy is honored """ pod_objs = pod.get_all_pods(namespace=self.namespace) if len(pod_objs) > 0: for pod_obj in pod_objs: pod_obj.delete() pod_obj.ocp.wait_for_delete(resource_name=pod_obj.name) if hasattr(self, 'pvc_obj'): pv_obj = self.pvc_obj.backed_pv_obj self.pvc_obj.delete() try: assert helpers.validate_pv_delete(pv_obj.name) except AssertionError: if self.reclaim_policy == constants.RECLAIM_POLICY_RETAIN: helpers.wait_for_resource_state(pv_obj, constants.STATUS_RELEASED) # TODO: deletion of ceph rbd image, blocked by BZ#1723656 pv_obj.delete() else: raise UnexpectedBehaviour( f"PV {pv_obj.name} is not deleted after deleting PVC") if hasattr(self, 'sc_obj'): self.sc_obj.delete()
def compare_sizes(mcg_obj, ceph_obj, bucket_name): """ Compares the sizes Args: mcg_obj (obj): MCG object ceph_obj (obj): OCP object of Ceph cluster bucket_name (str): Name of the bucket Raises: UnexpectedBehaviour: In case sizes does not match """ ceph_size = (ceph_obj.get().get("status").get("ceph").get("capacity").get( "bytesAvailable")) ceph_size_in_gb = float(format(ceph_size / constants.GB, ".3f")) bucket_size = get_bucket_available_size(mcg_obj, bucket_name) try: bucket_size_in_gb = float(format(bucket_size / constants.GB, ".3f")) except TypeError: raise NoobaaConditionException( "Noobaa backingstore has not yet synced the backend size. Retrying." ) if not abs(ceph_size_in_gb - bucket_size_in_gb) <= 1.5: raise UnexpectedBehaviour( f"Available size in ceph cluster:{ceph_size_in_gb} and object bucket:{bucket_size_in_gb} are not " f"matching. Retrying...") else: logger.info( f"Available size in ceph cluster:{ceph_size_in_gb} and object bucket:{bucket_size_in_gb} matches" )
def wait_for_build_to_complete(self, timeout=900): """ Wait for build status to reach complete state Args: timeout (int): Time in seconds to wait """ log.info(f"Waiting for the build to reach {JENKINS_BUILD_COMPLETE} state") for project in self.projects: jenkins_builds = self.get_builds_obj(namespace=project) for jenkins_build in jenkins_builds: if (jenkins_build.name, project) not in self.build_completed: try: wait_for_resource_state( resource=jenkins_build, state=JENKINS_BUILD_COMPLETE, timeout=timeout ) self.get_build_duration_time( namespace=project, build_name=jenkins_build.name ) except ResourceWrongStatusException: ocp_obj = OCP(namespace=project, kind='build') output = ocp_obj.describe(resource_name=jenkins_build.name) error_msg = ( f'{jenkins_build.name} did not reach to ' f'{JENKINS_BUILD_COMPLETE} state after {timeout} sec\n' f'oc describe output of {jenkins_build.name} \n:{output}' ) log.error(error_msg) self.print_completed_builds_results() raise UnexpectedBehaviour(error_msg)
def test_registry_respin_pod(self, pod_name, iterations): """ Test registry workload when backed by OCS respin of ceph pods """ # Respin relevant pod log.info(f"Respin Ceph pod {pod_name}") disruption = disruption_helpers.Disruptions() disruption.set_resource(resource=f'{pod_name}') disruption.delete_resource() # Start SVT workload for pushing images to registry svt_setup(iterations=iterations) # Image pull and push to registry image_pull(image_url=IMAGE_URL) self.image_path = image_push( image_url=IMAGE_URL, namespace=OPENSHIFT_IMAGE_REGISTRY_NAMESPACE ) # List the images in registry img_list = image_list_all() log.info(f"Image list {img_list}") # Check either image present in registry or not validate = check_image_exists_in_registry(image_url=IMAGE_URL) if not validate: raise UnexpectedBehaviour("Image URL not present in registry") # Validate image registry pods validate_registry_pod_status() # Validate cluster health ok and all pods are running self.sanity_helpers.health_check()
def test_bucket_policy_actions(self, mcg_obj, bucket_factory): """ Tests user access to Put, Get, Delete bucket policy actions """ # Creating obc and obc object to get account details, keys etc obc_name = bucket_factory(amount=1, interface='OC')[0].name obc_obj = OBC(obc_name) bucket_policy_generated = gen_bucket_policy( user_list=obc_obj.obc_account, actions_list=['PutBucketPolicy'], resources_list=[obc_obj.bucket_name]) bucket_policy = json.dumps(bucket_policy_generated) # Admin creates a policy on the user bucket, for Action: PutBucketPolicy logger.info( f'Creating policy by admin on bucket: {obc_obj.bucket_name}') put_policy = put_bucket_policy(mcg_obj, obc_obj.bucket_name, bucket_policy) logger.info(f'Put bucket policy response from admin: {put_policy}') # Verifying Put bucket policy by user by changing the actions to GetBucketPolicy & DeleteBucketPolicy user_generated_policy = gen_bucket_policy( user_list=obc_obj.obc_account, actions_list=['GetBucketPolicy', 'DeleteBucketPolicy'], resources_list=[obc_obj.bucket_name]) bucket_policy1 = json.dumps(user_generated_policy) logger.info( f'Changing bucket policy by User on bucket: {obc_obj.bucket_name}') put_policy_user = put_bucket_policy(obc_obj, obc_obj.bucket_name, bucket_policy1) logger.info(f'Put bucket policy response from user: {put_policy_user}') # Verifying whether user can get the bucket policy after modification get_policy = get_bucket_policy(obc_obj, obc_obj.bucket_name) logger.info(f"Got bucket policy: {get_policy['Policy']}") # Verifying whether user is not allowed Put the bucket policy after modification logger.info( f'Verifying whether user: {obc_obj.obc_account} is denied to put objects' ) try: put_bucket_policy(obc_obj, obc_obj.bucket_name, bucket_policy1) except boto3exception.ClientError as e: logger.info(e.response) response = HttpResponseParser(e.response) if response.error['Code'] == 'AccessDenied': logger.info( f'Put bucket policy has been denied access to the user: {obc_obj.obc_account}' ) else: raise UnexpectedBehaviour( f"{e.response} received invalid error code {response.error['Code']}" ) # Verifying whether user can Delete the bucket policy after modification logger.info(f'Deleting bucket policy on bucket: {obc_obj.bucket_name}') delete_policy = delete_bucket_policy(obc_obj, obc_obj.bucket_name) logger.info(f'Delete policy response: {delete_policy}')
def fetch_used_size(cbp_name, exp_val=None): """ Fetch used size in the pool Args: exp_val(float): Expected size in GB Returns: float: Used size in GB """ ct_pod = pod.get_ceph_tools_pod() rados_status = ct_pod.exec_ceph_cmd( ceph_cmd=f"rados df -p {cbp_name}" ) size_bytes = rados_status['pools'][0]['size_bytes'] # Convert size to GB used_in_gb = float( format(size_bytes / constants.GB, '.4f') ) if exp_val: if not abs(exp_val - used_in_gb) < 1.5: raise UnexpectedBehaviour( f"Actual {used_in_gb} and expected size {exp_val} not " f"matching. Retrying" ) return used_in_gb
def verify_pv_not_exists(pvc_obj, cbp_name): """ Ensure that pv does not exists """ # Validate on ceph side logger.info(f"Verifying pv {pvc_obj.backed_pv} exists on backend") _rc = pvc_obj.verify_pv_exists_in_backend(cbp_name) if _rc: raise UnexpectedBehaviour(f"pv {pvc_obj.backed_pv} exists on backend") logger.info( f"Expected: pv {pvc_obj.backed_pv} doesn't exist on backend after deleting pvc" ) # Validate on oc side try: PV.get(pvc_obj.backed_pv) except CommandFailed as ecf: assert "not found" in str(ecf), ( f"Unexpected: pv {pvc_obj.backed_pv} still exists" ) logger.info( f"Expected: pv should not be found " f"after deleting corresponding pvc" )
def check_incident_cleared(self, summary, measure_end_time, time_min=420): """ Check that all incidents with provided summary are cleared. Args: summary (str): Incident summary measure_end_time (int): Timestamp of measurement end time_min (int): Number of seconds to wait for incidents to be cleared since measurement end """ time_actual = time.time() time_wait = int((measure_end_time + time_min) - time_actual) if time_wait > 0: logger.info( f"Waiting for approximately {time_wait} seconds for incidents " f"to be cleared ({time_min} seconds since measurement end)") else: time_wait = 1 cleared_incidents = self.wait_for_incident_cleared(summary=summary, timeout=time_wait) logger.info(f"Cleared incidents: {cleared_incidents}") if len(cleared_incidents) != 0: raise UnexpectedBehaviour(f"{summary} incidents were not cleared") else: logger.info(f"{summary} incidents were cleared")
def stop_powernodes_machines(self, powernode_machines, timeout=900, wait=True, force=True): """ Stop PowerNode Machines Args: powernode_machines (list): PowerNode objects timeout (int): time in seconds to wait for node to reach 'not ready' state wait (bool): True if need to wait till the restarted node reaches timeout - for future use force (bool): True for PowerNode ungraceful power off, False for graceful PowerNode shutdown - for future use Raises: UnexpectedBehaviour: If PowerNode machine is still up """ ocpversion = get_ocp_version("-") for node in powernode_machines: cmd = f"sudo virsh shutdown test-ocp{ocpversion}-{node.name}" result = exec_cmd(cmd) logger.info(f"Result of shutdown {result}") logger.info("Verifying node is down") ret = TimeoutSampler( timeout=timeout, sleep=3, func=self.verify_machine_is_down, node=node, ) logger.info(ret) if not ret.wait_for_func_status(result=True): raise UnexpectedBehaviour("Node {node.name} is still Running")
def compare_sizes(mcg_obj, ceph_obj, bucket_name): """ Adds bucket policy to a bucket Args: mcg_obj (obj): MCG object ceph_obj (obj): OCP object of Ceph cluster bucket_name (str): Name of the bucket Raises: UnexpectedBehaviour: In case sizes does not match """ ceph_size = (ceph_obj.get().get("status").get("ceph").get("capacity").get( "bytesAvailable")) ceph_size_in_gb = float(format(ceph_size / constants.GB, ".3f")) bucket_size = get_bucket_available_size(mcg_obj, bucket_name) bucket_size_in_gb = float(format(bucket_size / constants.GB, ".3f")) if not abs(ceph_size_in_gb - bucket_size_in_gb) <= 1.5: raise UnexpectedBehaviour( f"Available size in ceph cluster:{ceph_size_in_gb} and object bucket:{bucket_size_in_gb} are not " f"matching. Retrying...") else: logger.info( f"Available size in ceph cluster:{ceph_size_in_gb} and object bucket:{bucket_size_in_gb} matches" )
def wait_for_pgbench_status(self, status, timeout=None): """ Wait for pgbench benchmark pods status to reach running/completed Args: status (str): status to reach Running or Completed timeout (int): Time in seconds to wait """ """ Sometimes with the default values in the benchmark yaml the pgbench pod is not getting completed within the specified time and the tests are failing. I think it is varying with the infrastructure. So, for now we set the timeout to 30 mins and will start monitoring each pg bench pods for each run.Based on the results we will define the timeout again """ timeout = timeout if timeout else 1800 # Wait for pg_bench pods to initialized and running log.info(f"Waiting for pgbench pods to be reach {status} state") pgbench_pod_objs = self.get_pgbench_pods() for pgbench_pod_obj in pgbench_pod_objs: try: wait_for_resource_state(resource=pgbench_pod_obj, state=status, timeout=timeout) except ResourceWrongStatusException: output = run_cmd(f"oc logs {pgbench_pod_obj.name}") error_msg = f"{pgbench_pod_obj.name} did not reach to {status} state after {timeout} sec\n{output}" log.error(error_msg) raise UnexpectedBehaviour(error_msg)
def wait_for_build_status(self, status, timeout=900): """ Wait for build status to reach running/completed Args: status (str): status to reach Running or Completed timeout (int): Time in seconds to wait """ log.info(f"Waiting for the build to reach {status} state") for project in self.projects: jenkins_builds = self.get_builds_obj(namespace=project) for jenkins_build in jenkins_builds: if (jenkins_build.name, project) not in self.build_completed: try: wait_for_resource_state(resource=jenkins_build, state=status, timeout=timeout) self.build_completed.append( (jenkins_build.name, project)) except ResourceWrongStatusException: ocp_obj = OCP(namespace=project, kind='build') output = ocp_obj.describe( resource_name=jenkins_build.name) error_msg = ( f'{jenkins_build.name} did not reach to ' f'{status} state after {timeout} sec\n' f'oc describe output of {jenkins_build.name} \n:{output}' ) log.error(error_msg) self.get_builds_logs() raise UnexpectedBehaviour(error_msg)
def validate_pgbench_run(self, pgbench_pods): """ Validate pgbench run Args: pgbench pods (list): List of pgbench pods Returns: pg_output (list): pgbench outputs in list """ all_pgbench_pods_output = [] for pgbench_pod in pgbench_pods: log.info(f"pgbench_client_pod===={pgbench_pod.name}====") output = run_cmd(f'oc logs {pgbench_pod.name}') pg_output = utils.parse_pgsql_logs(output) log.info("*******PGBench output log*********\n" f"{pg_output}") # for data in all_pgbench_pods_output: for data in pg_output: run_id = list(data.keys()) latency_avg = data[run_id[0]]['latency_avg'] if not latency_avg: raise UnexpectedBehaviour("PGBench failed to run, " "no data found on latency_avg") log.info(f"PGBench on {pgbench_pod.name} completed successfully") all_pgbench_pods_output.append(pg_output) return all_pgbench_pods_output
def wait_for_jenkins_deploy_status(self, status, timeout=600): """ Wait for jenkins deploy pods status to reach running/completed Args: status (str): status to reach Running or Completed timeout (int): Time in seconds to wait """ log.info(f"Waiting for jenkins-deploy pods to be reach {status} state") for project in self.projects: jenkins_deploy_pods = self.get_jenkins_deploy_pods(namespace=project) for jenkins_deploy_pod in jenkins_deploy_pods: try: wait_for_resource_state( resource=jenkins_deploy_pod, state=status, timeout=timeout ) except ResourceWrongStatusException: cmd = f'logs {jenkins_deploy_pod.name}' ocp_obj = OCP(namespace=project) output_log = ocp_obj.exec_oc_cmd(command=cmd, out_yaml_format=False) cmd = f'describe {jenkins_deploy_pod.name}' output_describe = ocp_obj.exec_oc_cmd(command=cmd, out_yaml_format=False) error_msg = ( f'{jenkins_deploy_pod.name} did not reach to ' f'{status} state after {timeout} sec' f'\n output log {jenkins_deploy_pod.name}:\n{output_log}' f'\n output describe {jenkins_deploy_pod.name}:\n{output_describe}' ) log.error(error_msg) raise UnexpectedBehaviour(error_msg)
def cleanup(self): """ Removes resources created in dynamic_pvc_base() and verifies the reclaim policy is honored """ if hasattr(self, 'pvc_obj'): pv_obj = self.pvc_obj.backed_pv_obj self.pvc_obj.delete() try: assert helpers.validate_pv_delete(pv_obj.name) except AssertionError: if self.reclaim_policy == constants.RECLAIM_POLICY_RETAIN: assert helpers.wait_for_resource_state( pv_obj, constants.STATUS_RELEASED) # TODO: deletion of ceph rbd image, blocked by BZ#1723656 pv_obj.delete() else: raise UnexpectedBehaviour( f"PV {pv_obj.name} is not deleted after deleting PVC") if hasattr(self, 'sc_obj'): self.sc_obj.delete()
def validate_pgbench_run(self, pgbench_pods, print_table=True): """ Validate pgbench run Args: pgbench pods (list): List of pgbench pods Returns: pg_output (list): pgbench outputs in list """ all_pgbench_pods_output = [] for pgbench_pod in pgbench_pods: log.info(f"pgbench_client_pod===={pgbench_pod.name}====") output = run_cmd( f"oc logs {pgbench_pod.name} -n {RIPSAW_NAMESPACE}") pg_output = utils.parse_pgsql_logs(output) log.info("*******PGBench output log*********\n" f"{pg_output}") # for data in all_pgbench_pods_output: for data in pg_output: run_id = list(data.keys()) latency_avg = data[run_id[0]]["latency_avg"] if not latency_avg: raise UnexpectedBehaviour("PGBench failed to run, " "no data found on latency_avg") log.info(f"PGBench on {pgbench_pod.name} completed successfully") all_pgbench_pods_output.append((pg_output, pgbench_pod.name)) if print_table: pgbench_pod_table = PrettyTable() pgbench_pod_table.field_names = [ "pod_name", "scaling_factor", "num_clients", "num_threads", "trans_client", "actually_trans", "latency_avg", "lat_stddev", "tps_incl", "tps_excl", ] for pgbench_pod_out in all_pgbench_pods_output: for pod_output in pgbench_pod_out[0]: for pod in pod_output.values(): pgbench_pod_table.add_row([ pgbench_pod_out[1], pod["scaling_factor"], pod["num_clients"], pod["num_threads"], pod["number_of_transactions_per_client"], pod["number_of_transactions_actually_processed"], pod["latency_avg"], pod["lat_stddev"], pod["tps_incl"], pod["tps_excl"], ]) log.info(f"\n{pgbench_pod_table}\n") return all_pgbench_pods_output
def verify_pv_not_exists(pvc_obj, cbp_name, rbd_image_id): """ Ensure that pv does not exists """ # Validate on ceph side logger.info(f"Verifying PV {pvc_obj.backed_pv} exists on backend") status = helpers.verify_volume_deleted_in_backend( interface=constants.CEPHBLOCKPOOL, image_uuid=rbd_image_id, pool_name=cbp_name) if not status: raise UnexpectedBehaviour(f"PV {pvc_obj.backed_pv} exists on backend") logger.info(f"Expected: PV {pvc_obj.backed_pv} " f"doesn't exist on backend after deleting PVC") # Validate on oc side logger.info("Verifying whether PV is deleted") try: assert helpers.validate_pv_delete(pvc_obj.backed_pv) except AssertionError as ecf: assert "not found" in str(ecf), ( f"Unexpected: PV {pvc_obj.backed_pv} still exists") logger.info(f"Expected: PV should not be found " f"after deleting corresponding PVC")
def test_scale_obc_post_upgrade(): """ Validate OBC scaled for post upgrade """ # Get info from SCALE_DATA_FILE for validation if os.path.exists(obc_scaled_data_file): file_data = templating.load_yaml(obc_scaled_data_file) namespace = file_data.get("NAMESPACE") obc_scale_list = file_data.get("OBC_SCALE_LIST") else: raise FileNotFoundError # Check obc status in current namespace obc_bound_list, obc_not_bound_list = scale_noobaa_lib.check_all_obcs_status( namespace ) # Check status of OBC scaled in pre-upgrade if not len(obc_bound_list) == len(obc_scale_list): raise UnexpectedBehaviour( f" OBC bound list count mismatch {len(obc_not_bound_list)} OBCs not in Bound state " f" OBCs not in Bound state {obc_not_bound_list}" ) else: log.info(f" Expected all {len(obc_bound_list)} OBCs are in Bound state") # Check ceph health status utils.ceph_health_check() # Clean up all scaled obc scale_noobaa_lib.cleanup(namespace=namespace)
def start(self, node, timeout): """ Start the given service using systemctl. Args: node (object): Node objects timeout (int): time in seconds to wait for service to start. Raises: UnexpectedBehaviour: If service on powerNode machine is still not up """ nodeip = self.nodes[node.name] cmd = f"ssh core@{nodeip} sudo systemctl start {self.service_name}.service" result = exec_cmd(cmd) logger.info( f"Result of start of service {self.service_name} is {result}") ret = TimeoutSampler( timeout=timeout, sleep=3, func=self.verify_service, node=node, action=ACTIVE, ) if not ret.wait_for_func_status(result=True): raise UnexpectedBehaviour( "Service {self.service_name} on Node {node.name} is still not Running" )
def get_expected_worker_count(scale_count=1500): """ Function to get expected worker count based on platform to scale pods in cluster Args: scale_count (int): Scale count of the PVC+POD to be created Returns: expected_worker_count (int): Expected worker count to scale required number of pod """ # Get expected worker count based on dict in constants.py worker_count_dict = constants.SCALE_WORKER_DICT if scale_count in worker_count_dict: if (config.ENV_DATA["deployment_type"] == "ipi" and config.ENV_DATA["platform"].lower() == "aws"): expected_worker_count = worker_count_dict[scale_count]["aws"] elif (config.ENV_DATA["deployment_type"] == "upi" and config.ENV_DATA["platform"].lower() == "vsphere"): expected_worker_count = worker_count_dict[scale_count]["vmware"] elif (config.ENV_DATA["deployment_type"] == "upi" and config.ENV_DATA["platform"].lower() == "baremetal"): expected_worker_count = worker_count_dict[scale_count]["bm"] elif (config.ENV_DATA["deployment_type"] == "upi" and config.ENV_DATA["platform"].lower() == "azure"): expected_worker_count = worker_count_dict[scale_count]["azure"] else: raise UnsupportedPlatformError("Unsupported Platform") return expected_worker_count else: raise UnexpectedBehaviour( "Scale_count value is not matching the dict key")
def check_pvcdata_collected_on_prometheus(pvc_name): """ Checks whether initially pvc related data is collected on pod Args: pvc_name (str): Name of the pvc Returns: True on success, raises UnexpectedBehaviour on failures """ logger.info( f"Verify for created pvc {pvc_name} related data is collected on prometheus pod" ) pvcs_data = get_metrics_persistentvolumeclaims_info() list_pvcs_data = pvcs_data.get('data').get('result') pvc_list = [ pvc for pvc in list_pvcs_data if pvc_name == pvc.get('metric').get('persistentvolumeclaim') ] if not pvc_list: raise UnexpectedBehaviour( f"On prometheus pod for created pvc {pvc_name} related data is not found" ) logger.info( f"Created pvc {pvc_name} data {pvc_list} is collected on prometheus pod" ) return True
def stop(self, node, timeout): """ Stop the given service using systemctl. Args: node (object): Node objects timeout (int): time in seconds to wait for service to stop. Raises: UnexpectedBehaviour: If service on PowerNode machine is still up """ nodeip = self.nodes[node.name] cmd = ( f"ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null root@{self.bastion_ip} ssh core@{nodeip} " f"sudo systemctl stop {self.service_name}.service") if self.force: cmd += " -f" result = exec_cmd(cmd) logger.info( f"Result of shutdown {result}. Checking if service {self.service_name} went down." ) ret = TimeoutSampler( timeout=timeout, sleep=3, func=self.verify_service, node=node, action=INACTIVE, ) if not ret.wait_for_func_status(result=True): raise UnexpectedBehaviour( f"Service {self.service_name} on Node {node.name} is still Running" )
def test_read_non_cached_object(self, bucket_factory, mcg_obj, cld_mgr, awscli_pod, bucketclass_dict): """ Test reading an object that is not present in a cache bucket. """ # Create the cached namespace bucket on top of the namespace resource bucket_obj = bucket_factory(bucketclass=bucketclass_dict)[0] s3_creds = { "access_key_id": cld_mgr.aws_client.access_key, "access_key": cld_mgr.aws_client.secret_key, "endpoint": constants.MCG_NS_AWS_ENDPOINT, "region": self.DEFAULT_REGION, } aws_target_bucket = bucket_obj.bucketclass.namespacestores[0].uls_name # Upload files directly to AWS writen_objs_names = self.write_files_to_pod_and_upload( mcg_obj, awscli_pod, bucket_to_write=aws_target_bucket, amount=3, s3_creds=s3_creds, ) if not check_cached_objects_by_name(mcg_obj, bucket_obj.name): raise UnexpectedBehaviour( "Objects were found in the cache of an empty bucket") # Read files from ns bucket self.download_files(mcg_obj, awscli_pod, bucket_to_read=bucket_obj.name) wait_for_cache(mcg_obj, bucket_obj.name, writen_objs_names)