Exemple #1
0
def test_monitoring_enabled():
    """
    OCS Monitoring is enabled after OCS installation (which is why this test
    has a post deployment marker) by asking for values of one ceph and one
    noobaa related metrics.
    """
    prometheus = PrometheusAPI()

    # ask for values of ceph_pool_stored metric
    logger.info("Checking that ceph data are provided in OCS monitoring")
    result = prometheus.query('ceph_pool_stored')
    msg = "check that we actually received some values for a ceph query"
    assert len(result) > 0, msg
    for metric in result:
        _, value = metric['value']
        assert_msg = "number of bytes in a pool isn't a positive integer or zero"
        assert int(value) >= 0, assert_msg
    # additional check that values makes at least some sense
    logger.info(
        "Checking that size of ceph_pool_stored result matches number of pools")
    ct_pod = pod.get_ceph_tools_pod()
    ceph_pools = ct_pod.exec_ceph_cmd("ceph osd pool ls")
    assert len(result) == len(ceph_pools)

    # again for a noobaa metric
    logger.info("Checking that MCG/NooBaa data are provided in OCS monitoring")
    result = prometheus.query('NooBaa_bucket_status')
    msg = "check that we actually received some values for a MCG/NooBaa query"
    assert len(result) > 0, msg
    for metric in result:
        _, value = metric['value']
        assert int(value) >= 0, "bucket status isn't a positive integer or zero"
Exemple #2
0
    def scan_cluster(self):
        """
        Get accurate info on current state of pods
        """
        self._ceph_pods = pod.get_all_pods(self._namespace)
        self.mons = pod.get_mon_pods(self.mon_selector, self.namespace)
        self.mdss = pod.get_mds_pods(self.mds_selector, self.namespace)
        self.mgrs = pod.get_mgr_pods(self.mgr_selector, self.namespace)
        self.osds = pod.get_osd_pods(self.osd_selector, self.namespace)
        self.toolbox = pod.get_ceph_tools_pod()

        # set port attrib on mon pods
        self.mons = list(map(self.set_port, self.mons))
        self.cluster.reload()
        if self.cephfs:
            self.cephfs.reload()
        else:
            try:
                self.cephfs_config = self.CEPHFS.get().get('items')[0]
                self.cephfs = ocs.OCS(**self.cephfs_config)
                self.cephfs.reload()
            except IndexError as e:
                logging.warning(e)
                logging.warning("No CephFS found")

        self.mon_count = len(self.mons)
        self.mds_count = len(self.mdss)
        self.mgr_count = len(self.mgrs)
        self.osd_count = len(self.osds)
def verify_pv_not_exists(pv_name, cbp_name):
    """
    Ensure that pv does not exists
    """

    # Validate on ceph side
    logger.info(f"Verifying pv {pv_name} exists on backend")
    ct_pod = pod.get_ceph_tools_pod()
    pvc_list = ct_pod.exec_ceph_cmd(
        ceph_cmd=f"rbd ls -p {cbp_name}", format='json'
    )
    _rc = pv_name in pvc_list

    if _rc:
        raise UnexpectedBehaviour(f"pv {pv_name} exists on backend")
    logger.info(
        f"Expected: pv {pv_name} doesn't exist on backend after deleting pvc"
    )

    # Validate on oc side
    try:
        PV.get(pv_name)
    except CommandFailed as ecf:
        assert "not found" in str(ecf), (
            f"Unexpected: pv {pv_name} still exists"
        )
    logger.info(
        f"Expected: pv should not be found "
        f"after deleting corresponding pvc"
    )
Exemple #4
0
    def test_ceph_default_values_check(self):
        """
        This test checks ceph default values taken from OCS 4.3 with the
        current values in the cluster

        """
        # The default ceph osd full ratio values
        expected_full_ratios = {
            "full_ratio": 0.85,
            "backfillfull_ratio": 0.8,
            "nearfull_ratio": 0.75,
        }
        actual_full_ratios = {}
        ct_pod = pod.get_ceph_tools_pod()
        log.info("Checking the values of ceph osd full ratios in osd map")
        osd_dump_dict = ct_pod.exec_ceph_cmd("ceph osd dump")
        for ratio_parm, value in expected_full_ratios.items():
            ratio_value = osd_dump_dict.get(ratio_parm)
            actual_full_ratios[ratio_parm] = float(round(ratio_value, 2))
            if not float(round(ratio_value, 2)) == value:
                log.error(
                    f"Actual {ratio_parm} value is {ratio_value:.2f} NOT "
                    f"matching the expected value {value}"
                )
        assert expected_full_ratios == actual_full_ratios, (
            "Actual full ratio values does not match expected full " "ratio values"
        )
        log.info(
            f"Actual full ratio {actual_full_ratios} values MATCHES expected "
            f"full ratio values {expected_full_ratios}"
        )

        # Check if the osd full ratios satisfies condition
        #  "nearfull < backfillfull < full"
        assert (
            osd_dump_dict["nearfull_ratio"]
            < osd_dump_dict["backfillfull_ratio"]
            < osd_dump_dict["full_ratio"]
        ), (
            "osd full ratio values does not satisfy condition "
            f"{osd_dump_dict['nearfull_ratio']:.2f} < "
            f"{osd_dump_dict['backfillfull_ratio']:.2f} < "
            f"{osd_dump_dict['full_ratio']:.2f}"
        )
        log.info(
            "osd full ratio values satisfies condition "
            f"{osd_dump_dict['nearfull_ratio']:.2f} < "
            f"{osd_dump_dict['backfillfull_ratio']:.2f} < "
            f"{osd_dump_dict['full_ratio']:.2f}"
        )

        # Check if PG balancer is active
        assert get_pg_balancer_status(), "PG balancer is not active"

        # Validates the default value of mon_max_pg_per_osd, BZ1908414.
        if float(config.ENV_DATA["ocs_version"]) >= 4.7:
            max_pg_per_osd = get_mon_config_value(key="mon_max_pg_per_osd")
            assert (
                max_pg_per_osd == 600
            ), f"Failed, actual value:{max_pg_per_osd} not matching expected value: 600"
Exemple #5
0
def drain_nodes(node_names):
    """
    Drain nodes

    Args:
        node_names (list): The names of the nodes

    Raises:
        TimeoutExpired: in case drain command fails to complete in time

    """
    ocp = OCP(kind="node")
    node_names_str = " ".join(node_names)
    log.info(f"Draining nodes {node_names_str}")
    try:
        ocp.exec_oc_cmd(
            f"adm drain {node_names_str} --force=true --ignore-daemonsets "
            f"--delete-local-data",
            timeout=1800,
        )
    except TimeoutExpired:
        ct_pod = pod.get_ceph_tools_pod()
        ceph_status = ct_pod.exec_cmd_on_pod("ceph status",
                                             out_yaml_format=False)
        log.error(
            f"Drain command failed to complete. Ceph status: {ceph_status}")
        # TODO: Add re-balance status once pull/1679 is merged
        raise
Exemple #6
0
    def scan_cluster(self):
        """
        Get accurate info on current state of pods
        """
        self._ceph_pods = pod.get_all_pods(self._namespace)
        # TODO: Workaround for BZ1748325:
        mons = pod.get_mon_pods(self.mon_selector, self.namespace)
        for mon in mons:
            if mon.ocp.get_resource_status(
                    mon.name) == constant.STATUS_RUNNING:
                self.mons.append(mon)
        # TODO: End of workaround for BZ1748325
        self.mdss = pod.get_mds_pods(self.mds_selector, self.namespace)
        self.mgrs = pod.get_mgr_pods(self.mgr_selector, self.namespace)
        self.osds = pod.get_osd_pods(self.osd_selector, self.namespace)
        self.toolbox = pod.get_ceph_tools_pod()

        # set port attrib on mon pods
        self.mons = list(map(self.set_port, self.mons))
        self.cluster.reload()
        if self.cephfs:
            self.cephfs.reload()
        else:
            try:
                self.cephfs_config = self.CEPHFS.get().get('items')[0]
                self.cephfs = ocs.OCS(**self.cephfs_config)
                self.cephfs.reload()
            except IndexError as e:
                logging.warning(e)
                logging.warning("No CephFS found")

        self.mon_count = len(self.mons)
        self.mds_count = len(self.mdss)
        self.mgr_count = len(self.mgrs)
        self.osd_count = len(self.osds)
Exemple #7
0
def get_ceph_storage_stats(ceph_pool_name):
    """
    Get ceph storage utilization values from ``ceph df``: total STORED value
    and MAX AVAIL of given ceph pool, which are important for understanding
    how much space is already consumed and how much is still available.

    Args:
        ceph_pool_name (str): name of ceph pool where you want to write data

    Returns:
        tuple:
            int: sum of all ceph pool STORED values (Bytes)
            int: value of MAX AVAIL value of given ceph pool (Bytes)

    """
    ct_pod = pod.get_ceph_tools_pod()
    ceph_df_dict = ct_pod.exec_ceph_cmd(ceph_cmd="ceph df")
    ceph_pool = None
    ceph_total_stored = 0
    for pool in ceph_df_dict["pools"]:
        ceph_total_stored += pool["stats"]["stored"]
        if pool["name"] == ceph_pool_name:
            ceph_pool = pool
    if ceph_pool is None:
        logger.error(f"pool {ceph_pool_name} was not found "
                     f"in output of `ceph df`: {ceph_df_dict}")
    # If the following assert fail, the problem is either:
    #  - name of the pool has changed (when this happens before GA, it's
    #    likely ocs-ci bug, after the release it's a product bug),
    #  - pool is missing (likely a product bug)
    # either way, the fixture can't continue ...
    assert ceph_pool is not None, f"Pool: {ceph_pool_name} doesn't exist!"
    return ceph_total_stored, ceph_pool['stats']['max_avail']
Exemple #8
0
def check_ceph_osd_tree():
    """
    Checks whether an OSD tree is created/modified correctly.
    It is a summary of the previous functions: 'check_osd_tree_1az_vmware',
    'check_osd_tree_3az_aws', 'check_osd_tree_1az_aws'.

    Returns:
         bool: True, if the ceph osd tree is formed correctly. Else False

    """
    osd_pods = pod.get_osd_pods()
    # 'ceph osd tree' should show the new osds under right nodes/hosts
    #  Verification is different for 3 AZ and 1 AZ configs
    ct_pod = pod.get_ceph_tools_pod()
    tree_output = ct_pod.exec_ceph_cmd(ceph_cmd="ceph osd tree")
    if config.ENV_DATA["platform"].lower() == constants.VSPHERE_PLATFORM:
        return check_osd_tree_1az_vmware(tree_output, len(osd_pods))

    aws_number_of_zones = 3
    if config.ENV_DATA["platform"].lower() == constants.AWS_PLATFORM:
        # parse the osd tree. if it contains a node 'rack' then it's a
        # AWS_1AZ cluster. Else, 3 AWS_3AZ cluster
        for i in range(len(tree_output["nodes"])):
            if tree_output["nodes"][i]["name"] in "rack0":
                aws_number_of_zones = 1
        if aws_number_of_zones == 1:
            return check_osd_tree_1az_aws(tree_output, len(osd_pods))
        else:
            return check_osd_tree_3az_aws(tree_output, len(osd_pods))
Exemple #9
0
def silence_ceph_osd_crash_warning(osd_pod_name):
    """
    Silence the osd crash warning of a specific osd pod

    Args:
        osd_pod_name (str): The name of the osd pod which we need to
            silence the crash warning

    Returns:
        bool: True if it found the osd crash with name 'osd_pod_name'. False otherwise

    """
    ct_pod = pod.get_ceph_tools_pod()
    new_crash_objects_list = ct_pod.exec_ceph_cmd(ceph_cmd="ceph crash ls-new")
    for crash_obj in new_crash_objects_list:
        if crash_obj.get("utsname_hostname") == osd_pod_name:
            logger.info(f"Found osd crash with name {osd_pod_name}")
            obj_crash_id = crash_obj.get("crash_id")
            crash_info = ct_pod.exec_ceph_cmd(
                ceph_cmd=f"ceph crash info {obj_crash_id}"
            )
            logger.info(f"ceph crash info: {crash_info}")

            logger.info("silence the osd crash warning")
            ct_pod.exec_ceph_cmd(ceph_cmd=f"ceph crash archive {obj_crash_id}")
            return True

    logger.info(
        f"Didn't find osd crash with name {osd_pod_name} in ceph crash warnings"
    )
    return False
Exemple #10
0
def fetch_used_size(cbp_name, exp_val=None):
    """
    Fetch used size in the pool

    Args:
        exp_val(float): Expected size in GB

    Returns:
        float: Used size in GB
    """

    ct_pod = pod.get_ceph_tools_pod()
    rados_status = ct_pod.exec_ceph_cmd(
        ceph_cmd=f"rados df -p {cbp_name}"
    )
    size_bytes = rados_status['pools'][0]['size_bytes']

    # Convert size to GB
    used_in_gb = float(
        format(size_bytes / constants.GB, '.4f')
    )
    if exp_val:
        if not abs(exp_val - used_in_gb) < 1.5:
            raise UnexpectedBehaviour(
                f"Actual {used_in_gb} and expected size {exp_val} not "
                f"matching. Retrying"
            )
    return used_in_gb
Exemple #11
0
    def get_ceph_capacity(self):
        """
        The function gets the total mount of storage capacity of the ocs cluster.
        the calculation is <Num of OSD> * <OSD size> / <replica number>
        it will not take into account the current used capacity.

        Returns:
            int : Total storage capacity in GiB (GiB is for development environment)

        """
        storage_cluster_obj = storage_cluster.StorageCluster(
            resource_name=config.ENV_DATA["storage_cluster_name"],
            namespace=config.ENV_DATA["cluster_namespace"],
        )
        replica = int(
            storage_cluster_obj.data["spec"]["storageDeviceSets"][0]["replica"]
        )

        ceph_pod = pod.get_ceph_tools_pod()
        ceph_status = ceph_pod.exec_ceph_cmd(ceph_cmd="ceph df")
        usable_capacity = (
            int(ceph_status["stats"]["total_bytes"]) / replica / constant.GB
        )

        return usable_capacity
    def test_multiple_pvc_concurrent_creation_deletion(self):
        """
        To exercise resource creation and deletion
        """
        # Start deleting 100 PVCs
        command = (f'for i in `seq 1 {self.number_of_pvc}`;do oc delete pvc '
                   f'{self.pvc_base_name}$i -n {self.namespace};done')
        proc = run_async(command)
        assert proc, (
            f'Failed to execute command for deleting {self.number_of_pvc} PVCs'
        )

        # Create 100 new PVCs
        # Parameters for PVC yaml as dict
        pvc_data = load_yaml_to_dict(constants.CSI_PVC_YAML)
        pvc_data['metadata']['namespace'] = self.namespace
        pvc_data['spec']['storageClassName'] = self.sc_obj.name
        pvc_data['metadata']['name'] = self.pvc_base_name_new

        # Create 100 PVCs
        pvc_objs = create_multiple_pvc(self.number_of_pvc, pvc_data)

        log.info(f'Created {self.number_of_pvc} new PVCs.')
        self.pvc_objs_new = pvc_objs[:]

        # Verify PVCs are Bound
        for pvc in self.pvc_objs_new:
            pvc.reload()
            assert pvc.status == constants.STATUS_BOUND, (
                f'PVC {pvc.name} is not Bound')
        log.info('Verified: Newly created PVCs are in Bound state.')

        # Verify command to delete PVCs
        ret, out, err = proc.async_communicate()
        log.info(
            f'Return values of command: {command}.\nretcode:{ret}\nstdout:'
            f'{out}\nstderr:{err}')
        assert not ret, 'Deletion of PVCs failed'

        # Verify PVCs are deleted
        for pvc in self.pvc_objs_initial:
            try:
                pvc.get()
                return False
            except exceptions.CommandFailed as exp:
                assert "not found" in str(exp), (
                    f'Failed to fetch details of PVC {pvc.name}')
                log.info(f'Expected: PVC {pvc.name} does not exists ')
        log.info(f'Successfully deleted initial {self.number_of_pvc} PVCs')

        # Verify PVs using ceph toolbox. PVs should be deleted because
        # reclaimPolicy is Delete
        ceph_cmd = f'rbd ls -p {self.cbp_obj.name}'
        ct_pod = get_ceph_tools_pod()
        final_pv_list = ct_pod.exec_ceph_cmd(ceph_cmd=ceph_cmd, format='json')
        assert not any(pv in final_pv_list for pv in self.initial_pvs), (
            'PVs associated with deleted PVCs still exists')
        log.info('Verified: PVs associated with deleted PVCs are also deleted')
    def setup(self, request, pod_factory):
        """
        Set values for:
          paxos_service_trim_min=10
          paxos_service_trim_max=100
          osd_op_complaint_time=0.000001
        """
        self.fio_pod_obj = pod_factory(constants.CEPHFILESYSTEM)
        mon_pods = get_mon_pods()
        self.selected_mon_pod_obj = random.choice(mon_pods)
        self.selected_mon_pod = (
            self.selected_mon_pod_obj.get().get("metadata").get("labels").get("mon")
        )
        log.info(f"Selected mon pod is: {self.selected_mon_pod_obj.name}")
        log.info(
            "Setting values: paxos_service_trim_min=10, paxos_service_trim_max=100 "
            "and osd_op_complaint_time=0.000001"
        )
        self.ct_pod = pod.get_ceph_tools_pod()
        # mon in the "tell" command should be mon.a / mon.b / mon.c
        self.ct_pod.exec_ceph_cmd(
            ceph_cmd=f"ceph tell mon.{self.selected_mon_pod} injectargs --paxos_service_trim_min=10"
        )
        self.ct_pod.exec_ceph_cmd(
            ceph_cmd=f"ceph tell mon.{self.selected_mon_pod} injectargs --paxos_service_trim_max=100"
        )
        self.ct_pod.exec_ceph_cmd(
            ceph_cmd=f"ceph tell mon.{self.selected_mon_pod} injectargs --osd_op_complaint_time=0.000001"
        )

        def finalizer():
            """
            Set default values for:
              paxos_service_trim_min=250
              paxos_service_trim_max=500
              osd_op_complaint_time=30.000000
            """
            if not self.stop_checking_mon_db:
                self.stop_checking_mon_db = True
            log.info(
                f"Setting default values for paxos_service_trim_min({constants.DEFAULT_PAXOS_SERVICE_TRIM_MIN}), "
                f"paxos_service_trim_max({constants.DEFAULT_PAXOS_SERVICE_TRIM_MAX}) "
                f"and osd_op_complaint_time({constants.DEFAULT_OSD_OP_COMPLAINT_TIME})"
            )
            self.ct_pod.exec_ceph_cmd(
                ceph_cmd=f"ceph tell mon.{self.selected_mon_pod} injectargs "
                f"--paxos_service_trim_min={constants.DEFAULT_PAXOS_SERVICE_TRIM_MIN}"
            )
            self.ct_pod.exec_ceph_cmd(
                ceph_cmd=f"ceph tell mon.{self.selected_mon_pod} injectargs "
                f"--paxos_service_trim_max={constants.DEFAULT_PAXOS_SERVICE_TRIM_MAX}"
            )
            self.ct_pod.exec_ceph_cmd(
                ceph_cmd=f"ceph tell mon.{self.selected_mon_pod} injectargs "
                f"--osd_op_complaint_time={constants.DEFAULT_OSD_OP_COMPLAINT_TIME}"
            )

        request.addfinalizer(finalizer)
Exemple #14
0
def test_run():
    tools_pod = pod.get_ceph_tools_pod()
    tools_pod.add_role(role='client')

    return radosbench.run(ceph_pods=[tools_pod],
                          config={
                              'time': 10,
                              'cleanup': False
                          })
Exemple #15
0
 def finalizer():
     log.info("Silence the ceph warnings by “archiving” the crash")
     tool_pod = get_ceph_tools_pod()
     tool_pod.exec_ceph_cmd(ceph_cmd="ceph crash archive-all",
                            format=None)
     log.info(
         "Perform Ceph and cluster health checks after silencing the ceph warnings"
     )
     ceph_health_check()
Exemple #16
0
def test_run():
    tools_pod = pod.get_ceph_tools_pod()
    tools_pod.add_role(role="client")

    return radosbench.run(ceph_pods=[tools_pod],
                          config={
                              "time": 10,
                              "cleanup": False
                          })
Exemple #17
0
def get_cephfs_data_pool_name():
    """
    Fetches ceph fs datapool name from Ceph

    Returns:
        str: fs datapool name
    """
    ct_pod = pod.get_ceph_tools_pod()
    out = ct_pod.exec_ceph_cmd('ceph fs ls')
    return out[0]['data_pools'][0]
Exemple #18
0
def test_main():
    tools_pod = pod.get_ceph_tools_pod()
    cmd = "ceph osd df"

    out, err, ret = tools_pod.exec_ceph_cmd(ceph_cmd=cmd)
    if out:
        print(out)
    if err:
        print(err)
    print(ret)
Exemple #19
0
def get_admin_key():
    """
    Fetches admin key secret from Ceph

    Returns:
        str: The admin key
    """
    ct_pod = pod.get_ceph_tools_pod()
    out = ct_pod.exec_ceph_cmd('ceph auth get-key client.admin')
    return out['key']
Exemple #20
0
 def count_ceph_components():
     ct_pod = pod.get_ceph_tools_pod()
     ceph_osd_ls_list = ct_pod.exec_ceph_cmd(ceph_cmd="ceph osd ls")
     logger.debug(f"ceph osd ls output: {ceph_osd_ls_list}")
     # the "+ 1" is a WORKAROUND for a bug in exec_ceph_cmd()
     # https://github.com/red-hat-storage/ocs-ci/issues/1152
     osd_num = len(ceph_osd_ls_list) + 1
     mon_num = len(ct_pod.exec_ceph_cmd(ceph_cmd="ceph mon metadata"))
     logger.info(f"There are {osd_num} OSDs, {mon_num} MONs")
     return osd_num, mon_num
Exemple #21
0
def get_ceph_df_detail():
    """
    Get ceph osd df detail

    Returns:
         dict: 'ceph df details' command output

    """
    ceph_cmd = "ceph df detail"
    ct_pod = pod.get_ceph_tools_pod()
    return ct_pod.exec_ceph_cmd(ceph_cmd=ceph_cmd)
Exemple #22
0
def get_balancer_eval():
    """
    Function to get ceph pg balancer eval value

    Returns:
        eval_out (float): Eval output of pg balancer

    """
    ceph_cmd = "ceph balancer eval"
    ct_pod = pod.get_ceph_tools_pod()
    eval_out = ct_pod.exec_ceph_cmd(ceph_cmd=ceph_cmd).split(" ")
    return float(eval_out[3])
Exemple #23
0
    def test_osd_heap_profile(self):
        """
        Generate heap profile dump file for OSDs and verify whether the file
        is created on '/var/log/ceph/'

        """
        strings_err = ["error", "fail"]
        osd_pods = get_osd_pods()
        osd_id = str(random.randint(0, len(osd_pods) - 1))

        log.info(f"Start heap profiler for osd-{osd_id}")
        pod_tool = get_ceph_tools_pod()
        out = pod_tool.exec_cmd_on_pod(
            command=f"ceph tell osd.{osd_id} heap start_profiler",
            out_yaml_format=False)
        logging.info(f"command output:{out}")
        for string_err in strings_err:
            assert (string_err not in out.lower()
                    ), f"{string_err} on the output command {out}"

        logging.info("Sleep 10 sec, for running heap profiler")
        time.sleep(10)

        log.info("Dump heap profile")
        out = pod_tool.exec_sh_cmd_on_pod(
            command=f"ceph tell osd.{osd_id} heap dump")
        logging.info(out)
        for string_err in strings_err:
            assert (string_err not in out.lower()
                    ), f"{string_err} on the output command {out}"

        log.info(f"Get osd-{osd_id} pod object")
        for osd_pod in osd_pods:
            if get_osd_pod_id(osd_pod) == osd_id:
                osd_pod_profile = osd_pod

        osd_profile_str = f"osd.{osd_id}.profile"
        log.info(f"Verify {osd_profile_str} log exist on /var/log/ceph/")
        sample = TimeoutSampler(
            timeout=100,
            sleep=10,
            func=self.verify_output_command_osd_pod,
            command="ls -ltr /var/log/ceph/",
            pod_obj=osd_pod_profile,
            str_to_check=osd_profile_str,
        )
        if not sample.wait_for_func_status(result=True):
            log.error(f"{osd_profile_str} log does not exist on /var/log/ceph")
            raise ValueError(
                f"{osd_profile_str} log does not exist on /var/log/ceph")

        log.info(f"osd.{osd_id}.profile log exist on /var/log/ceph")
Exemple #24
0
def corrupt_pg(osd_deployment, pool_name, pool_object):
    """
    Rewrite given object in a ceph pool with /etc/shadow file.

    Args:
        osd_deployment (object): OSD deployment object where PG will be corrupted
        pool_name (str): name of ceph pool to be corrupted
        pool_object (str): name of object to be corrupted
    """
    osd_pod = osd_deployment.pods[0]
    osd_data = osd_pod.get()
    osd_containers = osd_data["spec"]["containers"]
    original_osd_cmd = " ".join(osd_containers[0].get("command"))
    original_osd_args = osd_containers[0].get("args")
    original_osd_args = [",".join(arg.split()) for arg in original_osd_args]
    original_osd_args.remove("--foreground")
    original_osd_args = " ".join(original_osd_args)
    logger.info(f"Original args for osd deployment: {original_osd_args}")
    osd_id = osd_data["metadata"]["labels"]["ceph-osd-id"]

    ct_pod = pod.get_ceph_tools_pod()
    logger.info("Setting osd noout flag")
    ct_pod.exec_ceph_cmd("ceph osd set noout")
    logger.info("Setting osd noscrub flag")
    ct_pod.exec_ceph_cmd("ceph osd set noscrub")
    logger.info("Setting osd nodeep-scrub flag")
    ct_pod.exec_ceph_cmd("ceph osd set nodeep-scrub")
    patch_changes = [
        '[{"op": "remove", "path": "/spec/template/spec/containers/0/args"}]',
        '[{"op": "remove", "path": "/spec/template/spec/containers/0/livenessProbe"}]',
        '[{"op": "replace", "path": "/spec/template/spec/containers/0/command", '
        '"value" : ["/bin/bash", "-c", "sleep infinity"]}]',
        '[{"op": "remove", "path": "/spec/template/spec/containers/0/startupProbe"}]',
    ]
    for change in patch_changes:
        osd_deployment.ocp.patch(
            resource_name=osd_deployment.name, params=change, format_type="json"
        )

    logger.info(f"Looking for Placement Group ID with {pool_object} object")
    pgid = ct_pod.exec_ceph_cmd(f"ceph osd map {pool_name} {pool_object}")["pgid"]
    logger.info(f"Found Placement Group ID: {pgid}")

    osd_deployment.wait_for_available_replicas()
    osd_pod = osd_deployment.pods[0]
    osd_pod.exec_sh_cmd_on_pod(
        f"ceph-objectstore-tool --data-path /var/lib/ceph/osd/ceph-"
        f"{osd_id} --pgid {pgid} {pool_object} "
        f"set-bytes /etc/shadow --no-mon-config"
    )
    osd_pod.exec_cmd_on_pod(original_osd_cmd + " " + original_osd_args)
    ct_pod.exec_ceph_cmd(f"ceph pg deep-scrub {pgid}")
Exemple #25
0
def get_ceph_version():
    """
    Gets the ceph version

    Returns:
         str: ceph version

    """
    # importing here to avoid circular imports
    from ocs_ci.ocs.resources import pod
    ct_pod = pod.get_ceph_tools_pod()
    ceph_version = ct_pod.exec_ceph_cmd("ceph version")
    return re.split(r'ceph version ', ceph_version['version'])[1]
Exemple #26
0
def get_rook_version():
    """
    Gets the rook version

    Returns:
        str: rook version

    """
    # importing here to avoid circular imports
    from ocs_ci.ocs.resources import pod
    ct_pod = pod.get_ceph_tools_pod()
    rook_versions = ct_pod.exec_ceph_cmd("rook version", format='')
    return rook_versions['rook']
def run_io_on_pool(pool_obj):
    """
    Runs the I/O on the pool and delete the pool

    Returns: A thread of I/O
    """
    tools_pod = pod.get_ceph_tools_pod()
    tools_pod.add_role(role="client")

    return run_io_with_rados_bench(
        ceph_pods=[tools_pod],
        config={"time": 45, "cleanup": False, "pool": pool_obj.name},
    )
Exemple #28
0
def get_percent_used_capacity():
    """
    Function to calculate the percentage of used capacity in a cluster

    Returns:
        float: The percentage of the used capacity in the cluster

    """
    ct_pod = pod.get_ceph_tools_pod()
    output = ct_pod.exec_ceph_cmd(ceph_cmd="ceph df")
    total_used = output.get("stats").get("total_used_raw_bytes")
    total_avail = output.get("stats").get("total_bytes")
    return 100.0 * total_used / total_avail
Exemple #29
0
def get_crush_map():
    """
    Get decompiled CRUSH map from ceph toolbox pod.

    Returns:
        str: Multiline string representing current Ceph CRUSH map
    """
    ct_pod = pod.get_ceph_tools_pod()
    file_comp = '/tmp/crush_comp'
    file_decomp = '/tmp/crush_decomp'
    ct_pod.exec_ceph_cmd(f"ceph osd getcrushmap -o {file_comp}")
    ct_pod.exec_ceph_cmd(f"crushtool -d {file_comp} -o {file_decomp}")
    return ct_pod.exec_bash_cmd_on_pod(f"cat {file_decomp}")
Exemple #30
0
def node_replacement_verification_steps_ceph_side(
    old_node_name, new_node_name, new_osd_node_name
):
    """
    Check the verification steps from the Ceph side, after the process
    of node replacement as described in the docs

    Args:
        old_node_name (str): The name of the old node that has been deleted
        new_node_name (str): The name of the new node that has been created
        new_osd_node_name (str): The name of the new node that has been added to osd nodes

    Returns:
        bool: True if all the verification steps passed. False otherwise

    """
    if old_node_name == new_node_name:
        log.warning("Hostname didn't change")
        return False

    wait_for_nodes_status([new_node_name, new_osd_node_name])
    # It can take some time until all the ocs pods are up and running
    # after the process of node replacement
    if not pod.wait_for_pods_to_be_running():
        log.warning("Not all the pods in running state")
        return False

    ct_pod = pod.get_ceph_tools_pod()
    ceph_osd_status = ct_pod.exec_ceph_cmd(ceph_cmd="ceph osd status")
    if new_osd_node_name not in ceph_osd_status:
        log.warning("new osd node name not found in 'ceph osd status' output")
        return False
    if old_node_name in ceph_osd_status:
        log.warning("old node name found in 'ceph osd status' output")
        return False

    osd_node_names = get_osd_running_nodes()
    if new_osd_node_name not in osd_node_names:
        log.warning("the new osd hostname not found in osd node names")
        return False
    if old_node_name in osd_node_names:
        log.warning("the old hostname found in osd node names")
        return False

    from ocs_ci.ocs.cluster import check_ceph_osd_tree_after_node_replacement

    if not check_ceph_osd_tree_after_node_replacement():
        return False

    log.info("Verification steps from the ceph side finish successfully")
    return True