def test_monitoring_enabled(): """ OCS Monitoring is enabled after OCS installation (which is why this test has a post deployment marker) by asking for values of one ceph and one noobaa related metrics. """ prometheus = PrometheusAPI() # ask for values of ceph_pool_stored metric logger.info("Checking that ceph data are provided in OCS monitoring") result = prometheus.query('ceph_pool_stored') msg = "check that we actually received some values for a ceph query" assert len(result) > 0, msg for metric in result: _, value = metric['value'] assert_msg = "number of bytes in a pool isn't a positive integer or zero" assert int(value) >= 0, assert_msg # additional check that values makes at least some sense logger.info( "Checking that size of ceph_pool_stored result matches number of pools") ct_pod = pod.get_ceph_tools_pod() ceph_pools = ct_pod.exec_ceph_cmd("ceph osd pool ls") assert len(result) == len(ceph_pools) # again for a noobaa metric logger.info("Checking that MCG/NooBaa data are provided in OCS monitoring") result = prometheus.query('NooBaa_bucket_status') msg = "check that we actually received some values for a MCG/NooBaa query" assert len(result) > 0, msg for metric in result: _, value = metric['value'] assert int(value) >= 0, "bucket status isn't a positive integer or zero"
def scan_cluster(self): """ Get accurate info on current state of pods """ self._ceph_pods = pod.get_all_pods(self._namespace) self.mons = pod.get_mon_pods(self.mon_selector, self.namespace) self.mdss = pod.get_mds_pods(self.mds_selector, self.namespace) self.mgrs = pod.get_mgr_pods(self.mgr_selector, self.namespace) self.osds = pod.get_osd_pods(self.osd_selector, self.namespace) self.toolbox = pod.get_ceph_tools_pod() # set port attrib on mon pods self.mons = list(map(self.set_port, self.mons)) self.cluster.reload() if self.cephfs: self.cephfs.reload() else: try: self.cephfs_config = self.CEPHFS.get().get('items')[0] self.cephfs = ocs.OCS(**self.cephfs_config) self.cephfs.reload() except IndexError as e: logging.warning(e) logging.warning("No CephFS found") self.mon_count = len(self.mons) self.mds_count = len(self.mdss) self.mgr_count = len(self.mgrs) self.osd_count = len(self.osds)
def verify_pv_not_exists(pv_name, cbp_name): """ Ensure that pv does not exists """ # Validate on ceph side logger.info(f"Verifying pv {pv_name} exists on backend") ct_pod = pod.get_ceph_tools_pod() pvc_list = ct_pod.exec_ceph_cmd( ceph_cmd=f"rbd ls -p {cbp_name}", format='json' ) _rc = pv_name in pvc_list if _rc: raise UnexpectedBehaviour(f"pv {pv_name} exists on backend") logger.info( f"Expected: pv {pv_name} doesn't exist on backend after deleting pvc" ) # Validate on oc side try: PV.get(pv_name) except CommandFailed as ecf: assert "not found" in str(ecf), ( f"Unexpected: pv {pv_name} still exists" ) logger.info( f"Expected: pv should not be found " f"after deleting corresponding pvc" )
def test_ceph_default_values_check(self): """ This test checks ceph default values taken from OCS 4.3 with the current values in the cluster """ # The default ceph osd full ratio values expected_full_ratios = { "full_ratio": 0.85, "backfillfull_ratio": 0.8, "nearfull_ratio": 0.75, } actual_full_ratios = {} ct_pod = pod.get_ceph_tools_pod() log.info("Checking the values of ceph osd full ratios in osd map") osd_dump_dict = ct_pod.exec_ceph_cmd("ceph osd dump") for ratio_parm, value in expected_full_ratios.items(): ratio_value = osd_dump_dict.get(ratio_parm) actual_full_ratios[ratio_parm] = float(round(ratio_value, 2)) if not float(round(ratio_value, 2)) == value: log.error( f"Actual {ratio_parm} value is {ratio_value:.2f} NOT " f"matching the expected value {value}" ) assert expected_full_ratios == actual_full_ratios, ( "Actual full ratio values does not match expected full " "ratio values" ) log.info( f"Actual full ratio {actual_full_ratios} values MATCHES expected " f"full ratio values {expected_full_ratios}" ) # Check if the osd full ratios satisfies condition # "nearfull < backfillfull < full" assert ( osd_dump_dict["nearfull_ratio"] < osd_dump_dict["backfillfull_ratio"] < osd_dump_dict["full_ratio"] ), ( "osd full ratio values does not satisfy condition " f"{osd_dump_dict['nearfull_ratio']:.2f} < " f"{osd_dump_dict['backfillfull_ratio']:.2f} < " f"{osd_dump_dict['full_ratio']:.2f}" ) log.info( "osd full ratio values satisfies condition " f"{osd_dump_dict['nearfull_ratio']:.2f} < " f"{osd_dump_dict['backfillfull_ratio']:.2f} < " f"{osd_dump_dict['full_ratio']:.2f}" ) # Check if PG balancer is active assert get_pg_balancer_status(), "PG balancer is not active" # Validates the default value of mon_max_pg_per_osd, BZ1908414. if float(config.ENV_DATA["ocs_version"]) >= 4.7: max_pg_per_osd = get_mon_config_value(key="mon_max_pg_per_osd") assert ( max_pg_per_osd == 600 ), f"Failed, actual value:{max_pg_per_osd} not matching expected value: 600"
def drain_nodes(node_names): """ Drain nodes Args: node_names (list): The names of the nodes Raises: TimeoutExpired: in case drain command fails to complete in time """ ocp = OCP(kind="node") node_names_str = " ".join(node_names) log.info(f"Draining nodes {node_names_str}") try: ocp.exec_oc_cmd( f"adm drain {node_names_str} --force=true --ignore-daemonsets " f"--delete-local-data", timeout=1800, ) except TimeoutExpired: ct_pod = pod.get_ceph_tools_pod() ceph_status = ct_pod.exec_cmd_on_pod("ceph status", out_yaml_format=False) log.error( f"Drain command failed to complete. Ceph status: {ceph_status}") # TODO: Add re-balance status once pull/1679 is merged raise
def scan_cluster(self): """ Get accurate info on current state of pods """ self._ceph_pods = pod.get_all_pods(self._namespace) # TODO: Workaround for BZ1748325: mons = pod.get_mon_pods(self.mon_selector, self.namespace) for mon in mons: if mon.ocp.get_resource_status( mon.name) == constant.STATUS_RUNNING: self.mons.append(mon) # TODO: End of workaround for BZ1748325 self.mdss = pod.get_mds_pods(self.mds_selector, self.namespace) self.mgrs = pod.get_mgr_pods(self.mgr_selector, self.namespace) self.osds = pod.get_osd_pods(self.osd_selector, self.namespace) self.toolbox = pod.get_ceph_tools_pod() # set port attrib on mon pods self.mons = list(map(self.set_port, self.mons)) self.cluster.reload() if self.cephfs: self.cephfs.reload() else: try: self.cephfs_config = self.CEPHFS.get().get('items')[0] self.cephfs = ocs.OCS(**self.cephfs_config) self.cephfs.reload() except IndexError as e: logging.warning(e) logging.warning("No CephFS found") self.mon_count = len(self.mons) self.mds_count = len(self.mdss) self.mgr_count = len(self.mgrs) self.osd_count = len(self.osds)
def get_ceph_storage_stats(ceph_pool_name): """ Get ceph storage utilization values from ``ceph df``: total STORED value and MAX AVAIL of given ceph pool, which are important for understanding how much space is already consumed and how much is still available. Args: ceph_pool_name (str): name of ceph pool where you want to write data Returns: tuple: int: sum of all ceph pool STORED values (Bytes) int: value of MAX AVAIL value of given ceph pool (Bytes) """ ct_pod = pod.get_ceph_tools_pod() ceph_df_dict = ct_pod.exec_ceph_cmd(ceph_cmd="ceph df") ceph_pool = None ceph_total_stored = 0 for pool in ceph_df_dict["pools"]: ceph_total_stored += pool["stats"]["stored"] if pool["name"] == ceph_pool_name: ceph_pool = pool if ceph_pool is None: logger.error(f"pool {ceph_pool_name} was not found " f"in output of `ceph df`: {ceph_df_dict}") # If the following assert fail, the problem is either: # - name of the pool has changed (when this happens before GA, it's # likely ocs-ci bug, after the release it's a product bug), # - pool is missing (likely a product bug) # either way, the fixture can't continue ... assert ceph_pool is not None, f"Pool: {ceph_pool_name} doesn't exist!" return ceph_total_stored, ceph_pool['stats']['max_avail']
def check_ceph_osd_tree(): """ Checks whether an OSD tree is created/modified correctly. It is a summary of the previous functions: 'check_osd_tree_1az_vmware', 'check_osd_tree_3az_aws', 'check_osd_tree_1az_aws'. Returns: bool: True, if the ceph osd tree is formed correctly. Else False """ osd_pods = pod.get_osd_pods() # 'ceph osd tree' should show the new osds under right nodes/hosts # Verification is different for 3 AZ and 1 AZ configs ct_pod = pod.get_ceph_tools_pod() tree_output = ct_pod.exec_ceph_cmd(ceph_cmd="ceph osd tree") if config.ENV_DATA["platform"].lower() == constants.VSPHERE_PLATFORM: return check_osd_tree_1az_vmware(tree_output, len(osd_pods)) aws_number_of_zones = 3 if config.ENV_DATA["platform"].lower() == constants.AWS_PLATFORM: # parse the osd tree. if it contains a node 'rack' then it's a # AWS_1AZ cluster. Else, 3 AWS_3AZ cluster for i in range(len(tree_output["nodes"])): if tree_output["nodes"][i]["name"] in "rack0": aws_number_of_zones = 1 if aws_number_of_zones == 1: return check_osd_tree_1az_aws(tree_output, len(osd_pods)) else: return check_osd_tree_3az_aws(tree_output, len(osd_pods))
def silence_ceph_osd_crash_warning(osd_pod_name): """ Silence the osd crash warning of a specific osd pod Args: osd_pod_name (str): The name of the osd pod which we need to silence the crash warning Returns: bool: True if it found the osd crash with name 'osd_pod_name'. False otherwise """ ct_pod = pod.get_ceph_tools_pod() new_crash_objects_list = ct_pod.exec_ceph_cmd(ceph_cmd="ceph crash ls-new") for crash_obj in new_crash_objects_list: if crash_obj.get("utsname_hostname") == osd_pod_name: logger.info(f"Found osd crash with name {osd_pod_name}") obj_crash_id = crash_obj.get("crash_id") crash_info = ct_pod.exec_ceph_cmd( ceph_cmd=f"ceph crash info {obj_crash_id}" ) logger.info(f"ceph crash info: {crash_info}") logger.info("silence the osd crash warning") ct_pod.exec_ceph_cmd(ceph_cmd=f"ceph crash archive {obj_crash_id}") return True logger.info( f"Didn't find osd crash with name {osd_pod_name} in ceph crash warnings" ) return False
def fetch_used_size(cbp_name, exp_val=None): """ Fetch used size in the pool Args: exp_val(float): Expected size in GB Returns: float: Used size in GB """ ct_pod = pod.get_ceph_tools_pod() rados_status = ct_pod.exec_ceph_cmd( ceph_cmd=f"rados df -p {cbp_name}" ) size_bytes = rados_status['pools'][0]['size_bytes'] # Convert size to GB used_in_gb = float( format(size_bytes / constants.GB, '.4f') ) if exp_val: if not abs(exp_val - used_in_gb) < 1.5: raise UnexpectedBehaviour( f"Actual {used_in_gb} and expected size {exp_val} not " f"matching. Retrying" ) return used_in_gb
def get_ceph_capacity(self): """ The function gets the total mount of storage capacity of the ocs cluster. the calculation is <Num of OSD> * <OSD size> / <replica number> it will not take into account the current used capacity. Returns: int : Total storage capacity in GiB (GiB is for development environment) """ storage_cluster_obj = storage_cluster.StorageCluster( resource_name=config.ENV_DATA["storage_cluster_name"], namespace=config.ENV_DATA["cluster_namespace"], ) replica = int( storage_cluster_obj.data["spec"]["storageDeviceSets"][0]["replica"] ) ceph_pod = pod.get_ceph_tools_pod() ceph_status = ceph_pod.exec_ceph_cmd(ceph_cmd="ceph df") usable_capacity = ( int(ceph_status["stats"]["total_bytes"]) / replica / constant.GB ) return usable_capacity
def test_multiple_pvc_concurrent_creation_deletion(self): """ To exercise resource creation and deletion """ # Start deleting 100 PVCs command = (f'for i in `seq 1 {self.number_of_pvc}`;do oc delete pvc ' f'{self.pvc_base_name}$i -n {self.namespace};done') proc = run_async(command) assert proc, ( f'Failed to execute command for deleting {self.number_of_pvc} PVCs' ) # Create 100 new PVCs # Parameters for PVC yaml as dict pvc_data = load_yaml_to_dict(constants.CSI_PVC_YAML) pvc_data['metadata']['namespace'] = self.namespace pvc_data['spec']['storageClassName'] = self.sc_obj.name pvc_data['metadata']['name'] = self.pvc_base_name_new # Create 100 PVCs pvc_objs = create_multiple_pvc(self.number_of_pvc, pvc_data) log.info(f'Created {self.number_of_pvc} new PVCs.') self.pvc_objs_new = pvc_objs[:] # Verify PVCs are Bound for pvc in self.pvc_objs_new: pvc.reload() assert pvc.status == constants.STATUS_BOUND, ( f'PVC {pvc.name} is not Bound') log.info('Verified: Newly created PVCs are in Bound state.') # Verify command to delete PVCs ret, out, err = proc.async_communicate() log.info( f'Return values of command: {command}.\nretcode:{ret}\nstdout:' f'{out}\nstderr:{err}') assert not ret, 'Deletion of PVCs failed' # Verify PVCs are deleted for pvc in self.pvc_objs_initial: try: pvc.get() return False except exceptions.CommandFailed as exp: assert "not found" in str(exp), ( f'Failed to fetch details of PVC {pvc.name}') log.info(f'Expected: PVC {pvc.name} does not exists ') log.info(f'Successfully deleted initial {self.number_of_pvc} PVCs') # Verify PVs using ceph toolbox. PVs should be deleted because # reclaimPolicy is Delete ceph_cmd = f'rbd ls -p {self.cbp_obj.name}' ct_pod = get_ceph_tools_pod() final_pv_list = ct_pod.exec_ceph_cmd(ceph_cmd=ceph_cmd, format='json') assert not any(pv in final_pv_list for pv in self.initial_pvs), ( 'PVs associated with deleted PVCs still exists') log.info('Verified: PVs associated with deleted PVCs are also deleted')
def setup(self, request, pod_factory): """ Set values for: paxos_service_trim_min=10 paxos_service_trim_max=100 osd_op_complaint_time=0.000001 """ self.fio_pod_obj = pod_factory(constants.CEPHFILESYSTEM) mon_pods = get_mon_pods() self.selected_mon_pod_obj = random.choice(mon_pods) self.selected_mon_pod = ( self.selected_mon_pod_obj.get().get("metadata").get("labels").get("mon") ) log.info(f"Selected mon pod is: {self.selected_mon_pod_obj.name}") log.info( "Setting values: paxos_service_trim_min=10, paxos_service_trim_max=100 " "and osd_op_complaint_time=0.000001" ) self.ct_pod = pod.get_ceph_tools_pod() # mon in the "tell" command should be mon.a / mon.b / mon.c self.ct_pod.exec_ceph_cmd( ceph_cmd=f"ceph tell mon.{self.selected_mon_pod} injectargs --paxos_service_trim_min=10" ) self.ct_pod.exec_ceph_cmd( ceph_cmd=f"ceph tell mon.{self.selected_mon_pod} injectargs --paxos_service_trim_max=100" ) self.ct_pod.exec_ceph_cmd( ceph_cmd=f"ceph tell mon.{self.selected_mon_pod} injectargs --osd_op_complaint_time=0.000001" ) def finalizer(): """ Set default values for: paxos_service_trim_min=250 paxos_service_trim_max=500 osd_op_complaint_time=30.000000 """ if not self.stop_checking_mon_db: self.stop_checking_mon_db = True log.info( f"Setting default values for paxos_service_trim_min({constants.DEFAULT_PAXOS_SERVICE_TRIM_MIN}), " f"paxos_service_trim_max({constants.DEFAULT_PAXOS_SERVICE_TRIM_MAX}) " f"and osd_op_complaint_time({constants.DEFAULT_OSD_OP_COMPLAINT_TIME})" ) self.ct_pod.exec_ceph_cmd( ceph_cmd=f"ceph tell mon.{self.selected_mon_pod} injectargs " f"--paxos_service_trim_min={constants.DEFAULT_PAXOS_SERVICE_TRIM_MIN}" ) self.ct_pod.exec_ceph_cmd( ceph_cmd=f"ceph tell mon.{self.selected_mon_pod} injectargs " f"--paxos_service_trim_max={constants.DEFAULT_PAXOS_SERVICE_TRIM_MAX}" ) self.ct_pod.exec_ceph_cmd( ceph_cmd=f"ceph tell mon.{self.selected_mon_pod} injectargs " f"--osd_op_complaint_time={constants.DEFAULT_OSD_OP_COMPLAINT_TIME}" ) request.addfinalizer(finalizer)
def test_run(): tools_pod = pod.get_ceph_tools_pod() tools_pod.add_role(role='client') return radosbench.run(ceph_pods=[tools_pod], config={ 'time': 10, 'cleanup': False })
def finalizer(): log.info("Silence the ceph warnings by “archiving” the crash") tool_pod = get_ceph_tools_pod() tool_pod.exec_ceph_cmd(ceph_cmd="ceph crash archive-all", format=None) log.info( "Perform Ceph and cluster health checks after silencing the ceph warnings" ) ceph_health_check()
def test_run(): tools_pod = pod.get_ceph_tools_pod() tools_pod.add_role(role="client") return radosbench.run(ceph_pods=[tools_pod], config={ "time": 10, "cleanup": False })
def get_cephfs_data_pool_name(): """ Fetches ceph fs datapool name from Ceph Returns: str: fs datapool name """ ct_pod = pod.get_ceph_tools_pod() out = ct_pod.exec_ceph_cmd('ceph fs ls') return out[0]['data_pools'][0]
def test_main(): tools_pod = pod.get_ceph_tools_pod() cmd = "ceph osd df" out, err, ret = tools_pod.exec_ceph_cmd(ceph_cmd=cmd) if out: print(out) if err: print(err) print(ret)
def get_admin_key(): """ Fetches admin key secret from Ceph Returns: str: The admin key """ ct_pod = pod.get_ceph_tools_pod() out = ct_pod.exec_ceph_cmd('ceph auth get-key client.admin') return out['key']
def count_ceph_components(): ct_pod = pod.get_ceph_tools_pod() ceph_osd_ls_list = ct_pod.exec_ceph_cmd(ceph_cmd="ceph osd ls") logger.debug(f"ceph osd ls output: {ceph_osd_ls_list}") # the "+ 1" is a WORKAROUND for a bug in exec_ceph_cmd() # https://github.com/red-hat-storage/ocs-ci/issues/1152 osd_num = len(ceph_osd_ls_list) + 1 mon_num = len(ct_pod.exec_ceph_cmd(ceph_cmd="ceph mon metadata")) logger.info(f"There are {osd_num} OSDs, {mon_num} MONs") return osd_num, mon_num
def get_ceph_df_detail(): """ Get ceph osd df detail Returns: dict: 'ceph df details' command output """ ceph_cmd = "ceph df detail" ct_pod = pod.get_ceph_tools_pod() return ct_pod.exec_ceph_cmd(ceph_cmd=ceph_cmd)
def get_balancer_eval(): """ Function to get ceph pg balancer eval value Returns: eval_out (float): Eval output of pg balancer """ ceph_cmd = "ceph balancer eval" ct_pod = pod.get_ceph_tools_pod() eval_out = ct_pod.exec_ceph_cmd(ceph_cmd=ceph_cmd).split(" ") return float(eval_out[3])
def test_osd_heap_profile(self): """ Generate heap profile dump file for OSDs and verify whether the file is created on '/var/log/ceph/' """ strings_err = ["error", "fail"] osd_pods = get_osd_pods() osd_id = str(random.randint(0, len(osd_pods) - 1)) log.info(f"Start heap profiler for osd-{osd_id}") pod_tool = get_ceph_tools_pod() out = pod_tool.exec_cmd_on_pod( command=f"ceph tell osd.{osd_id} heap start_profiler", out_yaml_format=False) logging.info(f"command output:{out}") for string_err in strings_err: assert (string_err not in out.lower() ), f"{string_err} on the output command {out}" logging.info("Sleep 10 sec, for running heap profiler") time.sleep(10) log.info("Dump heap profile") out = pod_tool.exec_sh_cmd_on_pod( command=f"ceph tell osd.{osd_id} heap dump") logging.info(out) for string_err in strings_err: assert (string_err not in out.lower() ), f"{string_err} on the output command {out}" log.info(f"Get osd-{osd_id} pod object") for osd_pod in osd_pods: if get_osd_pod_id(osd_pod) == osd_id: osd_pod_profile = osd_pod osd_profile_str = f"osd.{osd_id}.profile" log.info(f"Verify {osd_profile_str} log exist on /var/log/ceph/") sample = TimeoutSampler( timeout=100, sleep=10, func=self.verify_output_command_osd_pod, command="ls -ltr /var/log/ceph/", pod_obj=osd_pod_profile, str_to_check=osd_profile_str, ) if not sample.wait_for_func_status(result=True): log.error(f"{osd_profile_str} log does not exist on /var/log/ceph") raise ValueError( f"{osd_profile_str} log does not exist on /var/log/ceph") log.info(f"osd.{osd_id}.profile log exist on /var/log/ceph")
def corrupt_pg(osd_deployment, pool_name, pool_object): """ Rewrite given object in a ceph pool with /etc/shadow file. Args: osd_deployment (object): OSD deployment object where PG will be corrupted pool_name (str): name of ceph pool to be corrupted pool_object (str): name of object to be corrupted """ osd_pod = osd_deployment.pods[0] osd_data = osd_pod.get() osd_containers = osd_data["spec"]["containers"] original_osd_cmd = " ".join(osd_containers[0].get("command")) original_osd_args = osd_containers[0].get("args") original_osd_args = [",".join(arg.split()) for arg in original_osd_args] original_osd_args.remove("--foreground") original_osd_args = " ".join(original_osd_args) logger.info(f"Original args for osd deployment: {original_osd_args}") osd_id = osd_data["metadata"]["labels"]["ceph-osd-id"] ct_pod = pod.get_ceph_tools_pod() logger.info("Setting osd noout flag") ct_pod.exec_ceph_cmd("ceph osd set noout") logger.info("Setting osd noscrub flag") ct_pod.exec_ceph_cmd("ceph osd set noscrub") logger.info("Setting osd nodeep-scrub flag") ct_pod.exec_ceph_cmd("ceph osd set nodeep-scrub") patch_changes = [ '[{"op": "remove", "path": "/spec/template/spec/containers/0/args"}]', '[{"op": "remove", "path": "/spec/template/spec/containers/0/livenessProbe"}]', '[{"op": "replace", "path": "/spec/template/spec/containers/0/command", ' '"value" : ["/bin/bash", "-c", "sleep infinity"]}]', '[{"op": "remove", "path": "/spec/template/spec/containers/0/startupProbe"}]', ] for change in patch_changes: osd_deployment.ocp.patch( resource_name=osd_deployment.name, params=change, format_type="json" ) logger.info(f"Looking for Placement Group ID with {pool_object} object") pgid = ct_pod.exec_ceph_cmd(f"ceph osd map {pool_name} {pool_object}")["pgid"] logger.info(f"Found Placement Group ID: {pgid}") osd_deployment.wait_for_available_replicas() osd_pod = osd_deployment.pods[0] osd_pod.exec_sh_cmd_on_pod( f"ceph-objectstore-tool --data-path /var/lib/ceph/osd/ceph-" f"{osd_id} --pgid {pgid} {pool_object} " f"set-bytes /etc/shadow --no-mon-config" ) osd_pod.exec_cmd_on_pod(original_osd_cmd + " " + original_osd_args) ct_pod.exec_ceph_cmd(f"ceph pg deep-scrub {pgid}")
def get_ceph_version(): """ Gets the ceph version Returns: str: ceph version """ # importing here to avoid circular imports from ocs_ci.ocs.resources import pod ct_pod = pod.get_ceph_tools_pod() ceph_version = ct_pod.exec_ceph_cmd("ceph version") return re.split(r'ceph version ', ceph_version['version'])[1]
def get_rook_version(): """ Gets the rook version Returns: str: rook version """ # importing here to avoid circular imports from ocs_ci.ocs.resources import pod ct_pod = pod.get_ceph_tools_pod() rook_versions = ct_pod.exec_ceph_cmd("rook version", format='') return rook_versions['rook']
def run_io_on_pool(pool_obj): """ Runs the I/O on the pool and delete the pool Returns: A thread of I/O """ tools_pod = pod.get_ceph_tools_pod() tools_pod.add_role(role="client") return run_io_with_rados_bench( ceph_pods=[tools_pod], config={"time": 45, "cleanup": False, "pool": pool_obj.name}, )
def get_percent_used_capacity(): """ Function to calculate the percentage of used capacity in a cluster Returns: float: The percentage of the used capacity in the cluster """ ct_pod = pod.get_ceph_tools_pod() output = ct_pod.exec_ceph_cmd(ceph_cmd="ceph df") total_used = output.get("stats").get("total_used_raw_bytes") total_avail = output.get("stats").get("total_bytes") return 100.0 * total_used / total_avail
def get_crush_map(): """ Get decompiled CRUSH map from ceph toolbox pod. Returns: str: Multiline string representing current Ceph CRUSH map """ ct_pod = pod.get_ceph_tools_pod() file_comp = '/tmp/crush_comp' file_decomp = '/tmp/crush_decomp' ct_pod.exec_ceph_cmd(f"ceph osd getcrushmap -o {file_comp}") ct_pod.exec_ceph_cmd(f"crushtool -d {file_comp} -o {file_decomp}") return ct_pod.exec_bash_cmd_on_pod(f"cat {file_decomp}")
def node_replacement_verification_steps_ceph_side( old_node_name, new_node_name, new_osd_node_name ): """ Check the verification steps from the Ceph side, after the process of node replacement as described in the docs Args: old_node_name (str): The name of the old node that has been deleted new_node_name (str): The name of the new node that has been created new_osd_node_name (str): The name of the new node that has been added to osd nodes Returns: bool: True if all the verification steps passed. False otherwise """ if old_node_name == new_node_name: log.warning("Hostname didn't change") return False wait_for_nodes_status([new_node_name, new_osd_node_name]) # It can take some time until all the ocs pods are up and running # after the process of node replacement if not pod.wait_for_pods_to_be_running(): log.warning("Not all the pods in running state") return False ct_pod = pod.get_ceph_tools_pod() ceph_osd_status = ct_pod.exec_ceph_cmd(ceph_cmd="ceph osd status") if new_osd_node_name not in ceph_osd_status: log.warning("new osd node name not found in 'ceph osd status' output") return False if old_node_name in ceph_osd_status: log.warning("old node name found in 'ceph osd status' output") return False osd_node_names = get_osd_running_nodes() if new_osd_node_name not in osd_node_names: log.warning("the new osd hostname not found in osd node names") return False if old_node_name in osd_node_names: log.warning("the old hostname found in osd node names") return False from ocs_ci.ocs.cluster import check_ceph_osd_tree_after_node_replacement if not check_ceph_osd_tree_after_node_replacement(): return False log.info("Verification steps from the ceph side finish successfully") return True