def test_reclaim_policy_retain(self): """ Calling functions for pvc invalid name and size """ pvc_count = len(list_ceph_images(pool_name=self.cbp_obj.name)) pvc_obj = helpers.create_pvc( sc_name=self.sc_obj_retain.name, pvc_name=helpers.create_unique_resource_name('retain', 'pvc')) helpers.wait_for_resource_state(pvc_obj, constants.STATUS_BOUND) pvc_obj.reload() pv_name = pvc_obj.get()['spec']['volumeName'] pv_namespace = pvc_obj.get()['metadata']['namespace'] pv_obj = ocp.OCP(kind='PersistentVolume', namespace=pv_namespace) assert pvc_obj.delete() pvc_obj.ocp.wait_for_delete(resource_name=pvc_obj.name) assert pv_obj.get(pv_name).get('status').get('phase') == 'Released', ( f"Status of PV {pv_obj.get(pv_name)} is not 'Released'") log.info("Status of PV is Released") assert pvc_count + 1 == len( list_ceph_images(pool_name=self.cbp_obj.name)) assert pv_obj.delete(resource_name=pv_name)
def get_rhcos_workers(self): """ Returns a list of rhcos worker names Returns: rhcos_workers (list): list of rhcos worker nodes """ rhcos_workers = [] ocp_obj = ocp.OCP(kind='node') node_info = ocp_obj.get() for each in node_info['items']: labels = each['metadata']['labels'] if ( labels['node.openshift.io/os_id'] == 'rhcos' and 'node-role.kubernetes.io/worker' in labels ): for every in each['status']['addresses']: if every['type'] == 'Hostname': rhcos_workers.append(every['address']) return rhcos_workers
def test_remove_mon_pod_from_cluster(self): """ To remove mon pod from the cluster after the I/O is performed on the pool and waiting for the operator to create a new mon pod on its own """ ceph_cluster = CephCluster() pods = ocp.OCP(kind=constants.POD, namespace=config.ENV_DATA['cluster_namespace']) list_mons = ceph_cluster.get_mons_from_cluster() assert len(list_mons) > 1, pytest.skip( "INVALID: Mon count should be more than one to delete.") self.pool_obj = create_ceph_block_pool() assert run_io_on_pool(self.pool_obj), 'Failed to run I/O on the pool' assert delete_cephblockpools([self.pool_obj]), 'Failed to delete pool' ceph_cluster.cluster_health_check(timeout=0) ceph_cluster.remove_mon_from_cluster() assert verify_mon_pod_up(pods), "Mon pods are not up and running state" ceph_cluster.cluster_health_check(timeout=60)
def add_role_to_user(role_type, user, cluster_role=False, namespace=None): """ Function to add a cluster/regular role to user Args: role_type (str): Type of the role to be added user (str): User to be added for the role cluster_role (bool): Whether to add a cluster-role or a regular role namespace (str): Namespace to be used Raises: AssertionError: When failure in adding new role to user """ ocp_obj = ocp.OCP() cluster = 'cluster-' if cluster_role else '' namespace = f'-n {namespace}' if namespace else '' role_cmd = ( f"adm policy add-{cluster}role-to-user {role_type} {user} {namespace}") assert ocp_obj.exec_oc_cmd(command=role_cmd), 'Adding role failed' logger.info(f"Role_type {role_type} added to the user {user}")
def post_ocp_deploy(self): """ Function does post OCP deployment stuff we need to do. """ # Workaround for #1777384 - enable container_use_cephfs on RHEL workers # Ticket: RHSTOR-787, see more details in the issue: #1151 logger.info("Running WA for ticket: RHSTOR-787") ocp_obj = ocp.OCP() cmd = ['/usr/sbin/setsebool -P container_use_cephfs on'] workers = get_typed_worker_nodes(os_id="rhel") for worker in workers: cmd_list = cmd.copy() node = worker.get().get('metadata').get('name') logger.info( f"{node} is a RHEL based worker - applying '{cmd_list}'") # We saw few times there was an issue to spawn debug RHEL pod. # Let's use retry decorator to make sure our CI is more stable. retry(CommandFailed)(ocp_obj.exec_oc_debug_cmd)(node=node, cmd_list=cmd_list) # end of workaround self.add_stage_cert()
def get_all_storageclass(): """ Function for getting all storageclass excluding 'gp2' and 'flex' Returns: list: list of storageclass """ sc_obj = ocp.OCP( kind=constants.STORAGECLASS, namespace=defaults.ROOK_CLUSTER_NAMESPACE ) result = sc_obj.get() sample = result['items'] storageclass = [ item for item in sample if ( item.get('metadata').get('name') not in (constants.IGNORE_SC_GP2, constants.IGNORE_SC_FLEX) ) ] return storageclass
def validate_pv_delete(pv_name): """ validates if pv is deleted after pvc deletion Args: pv_name (str): pv from pvc to validates Returns: bool: True if deletion is successful Raises: AssertionError: If pv is not deleted """ ocp_pv_obj = ocp.OCP(kind=constants.PV, namespace=defaults.ROOK_CLUSTER_NAMESPACE) try: if ocp_pv_obj.get(resource_name=pv_name): raise AssertionError except CommandFailed: return True
def workload_stop_ceph_mgr(): """ Downscales Ceph Manager deployment, measures the time when it was downscaled and monitors alerts that were triggered during this event. Returns: dict: Contains information about `start` and `stop` time for stopping Ceph Manager pod. """ oc = ocp.OCP(kind=constants.DEPLOYMENT, namespace=config.ENV_DATA['cluster_namespace']) mgr_deployments = oc.get(selector=constants.MGR_APP_LABEL)['items'] mgr = mgr_deployments[0]['metadata']['name'] def stop_mgr(): """ Downscale Ceph Manager deployment for 6 minutes. First 5 minutes the alert should be in 'Pending'. After 5 minutes it should be 'Firing'. This configuration of monitoring can be observed in ceph-mixins which are used in the project: https://github.com/ceph/ceph-mixins/blob/d22afe8c0da34490cb77e52a202eefcf4f62a869/config.libsonnet#L25 Returns: str: Name of downscaled deployment. """ # run_time of operation run_time = 60 * 6 nonlocal oc nonlocal mgr logger.info(f"Downscaling deployment {mgr} to 0") oc.exec_oc_cmd(f"scale --replicas=0 deployment/{mgr}") logger.info(f"Waiting for {run_time} seconds") time.sleep(run_time) return oc.get(mgr) measured_op = measure_operation(stop_mgr) logger.info(f"Upscaling deployment {mgr} back to 1") oc.exec_oc_cmd(f"scale --replicas=1 deployment/{mgr}") return measured_op
def validate_pdb_creation(): """ Validate creation of PDBs for MON, MDS and OSD pods. Raises: AssertionError: If required PDBs were not created. """ pdb_obj = ocp.OCP(kind="PodDisruptionBudget") item_list = pdb_obj.get().get("items") pdb_list = [item["metadata"]["name"] for item in item_list] osd_count = count_cluster_osd() pdb_required = [constants.MDS_PDB, constants.MON_PDB] for num in range(osd_count): pdb_required.append(constants.OSD_PDB + str(num)) pdb_list.sort() pdb_required.sort() for required, given in zip(pdb_required, pdb_list): assert required == given, f"{required} was not created" logger.info(f"All required PDBs created: {pdb_required}")
def modify_registry_pod_count(count): """ Function to modify registry replica count(increase/decrease pod count) Args: count (int): registry replica count to be changed to Returns: bool: True in case if changes are applied. False otherwise Raises: TimeoutExpiredError: When number of image registry pods doesn't match the count """ params = '{"spec":{"replicas":%d}}' % count ocp_obj = ocp.OCP( kind=constants.IMAGE_REGISTRY_CONFIG, namespace=constants.OPENSHIFT_IMAGE_REGISTRY_NAMESPACE, ) ocp_obj.patch(params=params, format_type="merge"), ( "Failed to run patch command to increase number of image registry pod" ) # Validate number of image registry pod should match the count for pod_list in TimeoutSampler( 300, 10, get_pod_name_by_pattern, "image-registry", constants.OPENSHIFT_IMAGE_REGISTRY_NAMESPACE, ): try: if pod_list is not None and len(pod_list) == count + 1: return True except IndexError as ie: logger.error( f"Number of image registry pod doesn't match the count. Error: {ie}" ) return False
def remove_kmsid(kmsid): """ This function will remove all the details for the given kmsid from the csi-kms-connection-details configmap Args: kmsid (str) : kmsid to be remove_kmsid Raises: KMSResourceCleaneupError: If the kmsid entry is not deleted """ ocp_obj = ocp.OCP() patch = f'\'[{{"op": "remove", "path": "/data/{kmsid}"}}]\'' patch_cmd = ( f"patch -n {constants.OPENSHIFT_STORAGE_NAMESPACE} cm " f"{constants.VAULT_KMS_CSI_CONNECTION_DETAILS} --type json -p " + patch ) ocp_obj.exec_oc_cmd(command=patch_cmd) kmsid_list = get_encryption_kmsid() if kmsid in kmsid_list: raise KMSResourceCleaneupError(f"KMS ID {kmsid} deletion failed") logger.info(f"KMS ID {kmsid} deleted")
def remove_role_from_user(role_type, user, cluster_role=False, namespace=None): """ Function to remove a cluster/regular role from a user Args: role_type (str): Type of the role to be removed user (str): User of the role cluster_role (bool): Whether to remove a cluster-role or a regular role namespace (str): Namespace to be used Raises: AssertionError: When failure in removing role from user """ ocp_obj = ocp.OCP() cluster = "cluster-" if cluster_role else "" namespace = f"-n {namespace}" if namespace else "" role_cmd = ( f"adm policy remove-{cluster}role-from-user {role_type} {user} {namespace}" ) assert ocp_obj.exec_oc_cmd(command=role_cmd), "Removing role failed" logger.info(f"Role_type {role_type} removed from user {user}")
def test_noobaa_service_mon_after_ocs_upgrade(): """ Verify 'noobaa-service-monitor' does not exist after OCS upgrade. Test Procedure: 1.Upgrade OCS version 2.Check servicemonitors 3.Verify 'noobaa-service-monitor' does not exist """ ocs_version = version.get_ocs_version_from_csv(only_major_minor=False, ignore_pre_release=True) if ocs_version <= version.get_semantic_version("4.7.4"): pytest.skip("The test is not supported on version less than 4.7.4") ocp_obj = ocp.OCP(kind=constants.SERVICE_MONITORS, namespace=defaults.ROOK_CLUSTER_NAMESPACE) servicemon = ocp_obj.get() servicemonitors = servicemon["items"] for servicemonitor in servicemonitors: assert (servicemonitor["metadata"]["name"] != "noobaa-service-monitor"), "noobaa-service-monitor exist" log.info("noobaa-service-monitor does not exist")
def cluster(request, log_cli_level): """ This fixture initiates deployment for both OCP and OCS clusters. Specific platform deployment classes will handle the fine details of action """ log.info(f"All logs located at {ocsci_log_path()}") teardown = config.RUN['cli_params']['teardown'] deploy = config.RUN['cli_params']['deploy'] factory = dep_factory.DeploymentFactory() deployer = factory.get_deployment() # Add a finalizer to teardown the cluster after test execution is finished if teardown: def cluster_teardown_finalizer(): deployer.destroy_cluster(log_cli_level) request.addfinalizer(cluster_teardown_finalizer) log.info("Will teardown cluster because --teardown was provided") # Download client force_download = (config.RUN['cli_params'].get('deploy') and config.DEPLOYMENT['force_download_client']) get_openshift_client(force_download=force_download) if deploy: # Deploy cluster deployer.deploy_cluster(log_cli_level) # Workaround for #1777384 - enable container_use_cephfs on RHEL workers ocp_obj = ocp.OCP() cmd = ['/usr/sbin/setsebool -P container_use_cephfs on'] workers = get_typed_worker_nodes(os_id="rhel") for worker in workers: cmd_list = cmd.copy() node = worker.get().get('metadata').get('name') log.info(f"{node} is a RHEL based worker - applying '{cmd_list}'") ocp_obj.exec_oc_debug_cmd(node=node, cmd_list=cmd_list)
def update_pull_secret(): """ Update pull secret with extra quay.io/rhceph-dev credentials. Note: This is a hack done to allow odf to odf deployment before full addon is available. """ oc = ocp.OCP(kind=constants.SECRET, namespace="openshift-config") logger.info("Update pull secret") pull_secret = oc.exec_oc_cmd( "get -n openshift-config secret/pull-secret -o yaml") secret_data = pull_secret["data"][".dockerconfigjson"] secret_data = base64.b64decode(secret_data).decode() rhceph_dev_key = config.AUTH["quay-rhceph-dev-auth"] secret_data = json.loads(secret_data) secret_data["quay.io/rhceph-dev"] = {"auth": rhceph_dev_key, "email": ""} secret_data = str.encode(json.dumps(secret_data)) with tempfile.NamedTemporaryFile() as secret_file: secret_file.write(secret_data) secret_file.flush() exec_cmd( f"oc set data secret/pull-secret -n openshift-config --from-file=.dockerconfigjson={secret_file.name}" )
def test_monitoring_after_restarting_prometheus_pod(self, pods): """ Test case to validate prometheus pod restart should not have any functional impact """ # Get the prometheus pod prometheus_pod_obj = pod.get_all_pods( namespace=defaults.OCS_MONITORING_NAMESPACE, selector=["prometheus"] ) for pod_object in prometheus_pod_obj: # Get the pvc which mounted on prometheus pod pod_info = pod_object.get() pvc_name = pod_info["spec"]["volumes"][0]["persistentVolumeClaim"][ "claimName" ] # Restart the prometheus pod pod_object.delete(force=True) pod_obj = ocp.OCP( kind=constants.POD, namespace=defaults.OCS_MONITORING_NAMESPACE ) assert pod_obj.wait_for_resource( condition="Running", selector="app=prometheus", timeout=60 ) # Check the same pvc is mounted on new pod pod_info = pod_object.get() assert ( pod_info["spec"]["volumes"][0]["persistentVolumeClaim"]["claimName"] in pvc_name ), f"Old pvc not found after restarting the prometheus pod {pod_object.name}" for pod_obj in pods: assert check_pvcdata_collected_on_prometheus( pod_obj.pvc.name ), f"On prometheus pod for created pvc {pod_obj.pvc.name} related data is not collected"
def delete_and_create_osd_node_aws_ipi(osd_node_name): """ Unschedule, drain and delete osd node, and creating a new osd node. At the end of the function there should be the same number of osd nodes as it was in the beginning, and also ceph health should be OK. This function is for AWS IPI. Args: osd_node_name (str): the name of the osd node """ # Unscheduling node unschedule_nodes([osd_node_name]) # Draining Node drain_nodes([osd_node_name]) log.info("Getting machine name from specified node name") machine_name = machine.get_machine_from_node_name(osd_node_name) log.info(f"Node {osd_node_name} associated machine is {machine_name}") log.info( f"Deleting machine {machine_name} and waiting for new machine to come up" ) machine.delete_machine_and_check_state_of_new_spinned_machine(machine_name) new_machine_list = machine.get_machines() for machines in new_machine_list: # Trimming is done to get just machine name # eg:- machine_name:- prsurve-40-ocs-43-kbrvf-worker-us-east-2b-nlgkr # After trimming:- prsurve-40-ocs-43-kbrvf-worker-us-east-2b if re.match(machines.name[:-6], machine_name): new_machine_name = machines.name machineset_name = machine.get_machineset_from_machine_name( new_machine_name) log.info("Waiting for new worker node to be in ready state") machine.wait_for_new_node_to_be_ready(machineset_name) new_node_name = get_node_from_machine_name(new_machine_name) log.info("Adding ocs label to newly created worker node") node_obj = ocp.OCP(kind='node') node_obj.add_label(resource_name=new_node_name, label=constants.OPERATOR_NODE_LABEL) log.info(f"Successfully labeled {new_node_name} with OCS storage label")
def svt_cleanup(): """ Removes clonned SVT project and virtual environemt and Projects Created while running SVT Raises: BaseException: In case any erros occured while removing project and ENV. Returns: bool: True if No exceptions, False otherwise """ ns_obj = ocp.OCP(kind="namespace") try: shutil.rmtree("/tmp/svt") shutil.rmtree("/tmp/venv") except BaseException: log.error("Error while cleaning SVT project") try: project_list = [ "cakephp-mysql0", "dancer-mysql0", "django-postgresql0", "eap64-mysql0", "nodejs-mongodb0", "rails-postgresql0", "tomcat8-mongodb0", ] # Reset namespace to default ocp.switch_to_default_rook_cluster_project() for project in project_list: run_cmd(f"oc delete project {project}") ns_obj.wait_for_delete(resource_name=project) return True except Exception: return False
def get_node_resource_utilization_from_adm_top(nodename=None, node_type='worker'): """ Gets the node's cpu and memory utilization in percentage using adm top command. Args: nodename (str) : The node name node_type (str) : The node type (e.g. master, worker) Returns: dict : Node name and its cpu and memory utilization in percentage """ node_names = [nodename] if nodename else [ node.name for node in get_typed_nodes(node_type=node_type) ] obj = ocp.OCP() resource_utilization_all_nodes = obj.exec_oc_cmd( command='adm top nodes', out_yaml_format=False).split("\n") utilization_dict = {} for node in node_names: for value in resource_utilization_all_nodes: if node in value: value = re.findall(r'\d+', value.strip()) cpu_utilization = value[2] log.info("The CPU utilized by the node " f"{node} is {cpu_utilization}%") memory_utilization = value[4] log.info("The memory utilized of the node " f"{node} is {memory_utilization}%") utilization_dict[node] = { 'cpu': int(cpu_utilization), 'memory': int(memory_utilization) } return utilization_dict
def check_mirroring_status_ok(replaying_images=None): """ Check if mirroring status has health OK and expected number of replaying images Args: replaying_images (int): Expected number of images in replaying state Returns: bool: True if status contains expected health and states values, False otherwise """ cbp_obj = ocp.OCP( kind=constants.CEPHBLOCKPOOL, resource_name=constants.DEFAULT_CEPHBLOCKPOOL, namespace=constants.OPENSHIFT_STORAGE_NAMESPACE, ) mirroring_status = cbp_obj.get().get("status").get("mirroringStatus").get("summary") logger.info(f"Mirroring status: {mirroring_status}") keys_to_check = ["health", "daemon_health", "image_health", "states"] for key in keys_to_check: if key != "states": expected_value = "OK" current_value = mirroring_status.get(key) elif key == "states" and replaying_images: # Replaying images count can be higher due to presence of dummy images # There can be upto 2 dummy images in each ODF cluster expected_value = range(replaying_images, replaying_images + 3) current_value = mirroring_status.get("states").get("replaying") else: continue if current_value not in expected_value: logger.warning( f"Unexpected {key} status. Current status is {current_value} but expected {expected_value}" ) return False return True
def validate_cephfilesystem(fs_name): """ Verify CephFileSystem exists at ceph and k8s Args: fs_name (str): The name of the Ceph FileSystem Returns: bool: True if CephFileSystem is created at ceph and k8s side else will return False with valid msg i.e Failure cause """ CFS = ocp.OCP( kind=constants.CEPHFILESYSTEM, namespace=defaults.ROOK_CLUSTER_NAMESPACE ) ct_pod = pod.get_ceph_tools_pod() ceph_validate = False k8s_validate = False cmd = "ceph fs ls" logger.info(fs_name) out = ct_pod.exec_ceph_cmd(ceph_cmd=cmd) if out: out = out[0]['name'] logger.info(out) if out == fs_name: logger.info("FileSystem got created from Ceph Side") ceph_validate = True else: logger.error("FileSystem was not present at Ceph Side") return False result = CFS.get(resource_name=fs_name) if result['metadata']['name']: logger.info(f"Filesystem got created from kubernetes Side") k8s_validate = True else: logger.error("Filesystem was not create at Kubernetes Side") return False return True if (ceph_validate and k8s_validate) else False
def wait_for_job_completion(namespace, timeout, error_msg): """ This is a WORKAROUND of particular ocsci design choices: I just wait for one pod in the namespace, and then ask for the pod again to get it's name (but it would be much better to just wait for the job to finish instead, then ask for a name of the successful pod and use it to get logs ...) Returns: str: name of Pod resource of the finished job """ ocp_pod = ocp.OCP(kind="Pod", namespace=namespace) try: ocp_pod.wait_for_resource(resource_count=1, condition=constants.STATUS_COMPLETED, timeout=timeout, sleep=30) except TimeoutExpiredError as ex: # report some high level error as well logger.error(error_msg) # TODO: log both describe and the output from the fio pods, as DEBUG ex.message = error_msg raise (ex) # indentify pod of the completed job pod_data = ocp_pod.get() # explicit list of assumptions, if these assumptions are not met, the # code won't work and it either means that something went terrible # wrong or that the code needs to be changed assert pod_data['kind'] == "List" pod_dict = pod_data['items'][0] assert pod_dict['kind'] == "Pod" pod_name = pod_dict['metadata']['name'] logger.info(f"Identified pod name of the finished Job: {pod_name}") pod_name = pod_dict['metadata']['name'] return pod_name
def add_new_node_and_label_it(machineset_name): """ Add a new node and label it Args: machineset_name (str): Name of the machine set eg: add_new_node_and_label_it("new-tdesala-zlqzn-worker-us-east-2a") """ # Get the initial nodes list initial_nodes = tests.helpers.get_worker_nodes() log.info(f"Current available worker nodes are {initial_nodes}") # get machineset replica count machineset_replica_count = machine.get_replica_count(machineset_name) log.info( f"{machineset_name} has replica count: {machineset_replica_count}") # Increase its replica count log.info("Increasing the replica count by 1") machine.add_node(machineset_name, count=machineset_replica_count + 1) log.info(f"{machineset_name} now has replica " f"count: {machineset_replica_count + 1}") # wait for the new node to come to ready state log.info("Waiting for the new node to be in ready state") machine.wait_for_new_node_to_be_ready(machineset_name) # Get the node name of new spun node nodes_after_new_spun_node = tests.helpers.get_worker_nodes() new_spun_node = list(set(nodes_after_new_spun_node) - set(initial_nodes)) log.info(f"New spun node is {new_spun_node}") # Label it node_obj = ocp.OCP(kind='node') node_obj.add_label(resource_name=new_spun_node[0], label=constants.OPERATOR_NODE_LABEL) log.info(f"Successfully labeled {new_spun_node} with OCS storage label") return new_spun_node[0]
def rsync_kubeconf_to_node(node): """ Function to copy kubeconfig to OCP node Args: node (str): OCP node to copy kubeconfig if not present """ # ocp_obj = ocp.OCP() filename = os.path.join(config.ENV_DATA['cluster_path'], config.RUN['kubeconfig_location']) file_path = os.path.dirname(filename) master_list = get_master_nodes() ocp_obj = ocp.OCP() check_auth = 'auth' check_conf = 'kubeconfig' node_path = '/home/core/' if check_auth not in ocp_obj.exec_oc_debug_cmd( node=master_list[0], cmd_list=[f"ls {node_path}"]): ocp.rsync(src=file_path, dst=f"{node_path}", node=node, dst_node=True) elif check_conf not in ocp_obj.exec_oc_debug_cmd( node=master_list[0], cmd_list=[f"ls {node_path}auth"]): ocp.rsync(src=file_path, dst=f"{node_path}", node=node, dst_node=True)
def factory(db2u_project_name, ldap_r_n, ldap_r_p, db2u_r_n, db2u_r_p): """ Args: db2u_project_name (str): Name of the db2u project. ldap_r_n (str): LDAP release name. ldap_r_p (str): LDAP release password. db2u_r_n (str): DB2U release name db2u_r_p (str): DB2U release name """ db2u_project.append(db2u_project_name) ocp_proj.append(ocp.OCP(namespace=db2u_project_name)) temp_ldap_r_n.append(ldap_r_n) temp_db2u_r_n.append(db2u_r_n) log.info("Creating LDAP secrets") ocp_proj[0].exec_oc_cmd( command=f"create secret generic {ldap_r_n}-db2u-ldap-bluadmin " f"--from-literal=password={ldap_r_p}") log.info("Creating DB2U secrets") ocp_proj[0].exec_oc_cmd( command=f"create secret generic {db2u_r_n}-db2u-instance " f"--from-literal=password={db2u_r_p}")
def run_memory_leak_in_bg(): """ Function to run memory leak in background thread Memory leak data is written in below format date time PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ COMMAND """ oc = ocp.OCP(namespace=config.ENV_DATA['cluster_namespace']) while get_flag_status() == 'running': for worker in helpers.get_worker_nodes(): filename = f"/tmp/{worker}-top-output.txt" top_cmd = f"debug nodes/{worker} -- chroot /host top -n 2 b" with open("/tmp/file.txt", "w+") as temp: temp.write( str( oc.exec_oc_cmd(command=top_cmd, out_yaml_format=False))) temp.seek(0) for line in temp: if line.__contains__("ceph-osd"): with open(filename, "a+") as f: f.write(str(datetime.now())) f.write(' ') f.write(line)
def check_vr_state(state, namespace): """ Check if all VR in the given namespace are in expected state Args: state (str): The VR state to check for (e.g. 'primary', 'secondary') namespace (str): the namespace of the VR resources Returns: bool: True if all VR are in expected state, False otherwise """ vr_obj = ocp.OCP(kind=constants.VOLUME_REPLICATION, namespace=namespace) vr_list = get_all_vrs(namespace) vr_state_mismatch = [] for vr in vr_list: desired_state = vr_obj.get(vr).get("spec").get("replicationState") current_state = vr_obj.get(vr).get("status").get("state") logger.info( f"VR: {vr} desired state is {desired_state}, current state is {current_state}" ) if not ( state.lower() == desired_state.lower() and state.lower() == current_state.lower() ): vr_state_mismatch.append(vr) if not vr_state_mismatch: logger.info(f"All {len(vr_list)} VR are in expected state {state}") return True else: logger.warning( f"Following {len(vr_state_mismatch)} VR are not in expected {state} state: {vr_state_mismatch}" ) return False
def change_registry_backend_to_ocs(): """ Function to deploy registry with OCS backend. Raises: AssertionError: When failure in change of registry backend to OCS """ sc_name = f"{constants.DEFAULT_STORAGECLASS_CEPHFS}" pv_obj = helpers.create_pvc( sc_name=sc_name, pvc_name='registry-cephfs-rwx-pvc', namespace=constants.OPENSHIFT_IMAGE_REGISTRY_NAMESPACE, size='100Gi', access_mode=constants.ACCESS_MODE_RWX ) helpers.wait_for_resource_state(pv_obj, 'Bound') ocp_obj = ocp.OCP( kind=constants.CONFIG, namespace=constants.OPENSHIFT_IMAGE_REGISTRY_NAMESPACE ) param_cmd = f'[{{"op": "add", "path": "/spec/storage", "value": {{"pvc": {{"claim": "{pv_obj.name}"}}}}}}]' assert ocp_obj.patch( resource_name=constants.IMAGE_REGISTRY_RESOURCE_NAME, params=param_cmd, format_type='json' ), f"Registry pod storage backend to OCS is not success" if(config.ENV_DATA['platform'] not in constants.CLOUD_PLATFORMS): run_cmd( f'oc patch {constants.IMAGE_REGISTRY_CONFIG} --type merge -p ' f'\'{{"spec":{{"managementState": "Managed"}}}}\'' ) logger.info( "Waiting 30 seconds after change managementState of image-registry." ) time.sleep(30) # Validate registry pod status validate_registry_pod_status() # Validate pvc mount in the registry pod validate_pvc_mount_on_registry_pod()
def mcg_workload_job(fio_job_dict_mcg, fio_configmap_dict_mcg, fio_conf_mcg, fio_project_mcg, tmp_path, request): """ Creates kubernetes job that should utilize MCG during upgrade. Returns: object: Job object """ fio_configmap_dict_mcg["data"]["workload.fio"] = fio_conf_mcg fio_objs = [fio_configmap_dict_mcg, fio_job_dict_mcg] job_name = fio_job_dict_mcg['metadata']['name'] log.info(f"Creating job {job_name}") job_file = ObjectConfFile("fio_continuous", fio_objs, fio_project_mcg, tmp_path) # deploy the Job to the cluster and start it job_file.create() log.info(f"Job {job_name} created") # get job object ocp_job_obj = ocp.OCP(kind=constants.JOB, namespace=fio_project_mcg.namespace) job = OCS(**ocp_job_obj.get(resource_name=job_name)) def teardown(): """ Delete mcg job """ job.delete() job.ocp.wait_for_delete(job.name) request.addfinalizer(teardown) return job
def __init__(self): with open(constants.CSI_CEPHFS_POD_YAML, "r") as pod_fd: pod_info = yaml.safe_load(pod_fd) pvc_name = pod_info["spec"]["volumes"][0]["persistentVolumeClaim"][ "claimName"] # Make sure the pvc and pod names are unique, so AlreadyExists # exceptions are not thrown. pvc_name += str(uuid.uuid4()) self.pod_name = pod_info["metadata"]["name"] + str(uuid.uuid4()) config.RUN["cli_params"]["teardown"] = True self.cephfs_pvc = helpers.create_pvc( sc_name=constants.DEFAULT_STORAGECLASS_CEPHFS, namespace=config.ENV_DATA["cluster_namespace"], pvc_name=pvc_name, size=SIZE, ) helpers.wait_for_resource_state(self.cephfs_pvc, constants.STATUS_BOUND, timeout=1200) self.cephfs_pod = helpers.create_pod( interface_type=constants.CEPHFILESYSTEM, namespace=config.ENV_DATA["cluster_namespace"], pvc_name=pvc_name, pod_name=self.pod_name, ) helpers.wait_for_resource_state(self.cephfs_pod, constants.STATUS_RUNNING, timeout=300) logging.info("pvc and cephfs pod created") self.ocp_obj = ocp.OCP( kind=constants.POD, namespace=config.ENV_DATA["cluster_namespace"], ) self.test_file_list = add_million_files(self.pod_name, self.ocp_obj) logging.info("cephfs test files created")