コード例 #1
0
ファイル: test_add_capacity.py プロジェクト: nbalacha/ocs-ci
def add_capacity_test():
    osd_size = storage_cluster.get_osd_size()
    result = storage_cluster.add_capacity(osd_size)
    pod = OCP(kind=constants.POD,
              namespace=config.ENV_DATA["cluster_namespace"])
    pod.wait_for_resource(
        timeout=300,
        condition=constants.STATUS_RUNNING,
        selector="app=rook-ceph-osd",
        resource_count=result * 3,
    )

    # Verify status of rook-ceph-osd-prepare pods. Verifies bug 1769061
    # pod.wait_for_resource(
    #     timeout=300,
    #     condition=constants.STATUS_COMPLETED,
    #     selector=constants.OSD_PREPARE_APP_LABEL,
    #     resource_count=result * 3
    # )
    # Commented this lines as a workaround due to bug 1842500

    # Verify OSDs are encrypted.
    if config.ENV_DATA.get("encryption_at_rest"):
        osd_encryption_verification()

    ceph_health_check(namespace=config.ENV_DATA["cluster_namespace"], tries=80)
    ceph_cluster_obj = CephCluster()
    assert ceph_cluster_obj.wait_for_rebalance(
        timeout=5400), "Data re-balance failed to complete"
コード例 #2
0
    def test_add_ocs_node(self, add_nodes):
        """
        Test to add ocs nodes and wait till rebalance is completed

        """
        add_nodes(ocs_nodes=True)
        ceph_cluster_obj = CephCluster()
        assert ceph_cluster_obj.wait_for_rebalance(
            timeout=3600), "Data re-balance failed to complete"
コード例 #3
0
    def test_nodereplacement_proactive(self):
        """
        Knip-894 Node Replacement proactive(without IO running)

        """
        osd_node_name = select_osd_node_name()
        delete_and_create_osd_node(osd_node_name)

        # Verify everything running fine
        log.info(
            "Verifying All resources are Running and matches expected result")
        self.sanity_helpers.health_check(tries=90)
        ceph_cluster_obj = CephCluster()
        assert ceph_cluster_obj.wait_for_rebalance(
            timeout=1800), ("Data re-balance failed to complete")
コード例 #4
0
def add_capacity_test():
    osd_size = storage_cluster.get_osd_size()
    existing_osd_pods = get_osd_pods()
    existing_osd_pod_names = [pod.name for pod in existing_osd_pods]
    result = storage_cluster.add_capacity(osd_size)
    osd_pods_post_expansion = get_osd_pods()
    osd_pod_names_post_expansion = [
        pod.name for pod in osd_pods_post_expansion
    ]
    restarted_osds = list()
    logger.info(
        "Checking if existing OSD pods were restarted (deleted) post add capacity (bug 1931601)"
    )

    for pod in existing_osd_pod_names:
        if pod not in osd_pod_names_post_expansion:
            restarted_osds.append(pod)
    assert (
        len(restarted_osds) == 0
    ), f"The following OSD pods were restarted (deleted) post add capacity: {restarted_osds}"

    pod = OCP(kind=constants.POD,
              namespace=config.ENV_DATA["cluster_namespace"])
    pod.wait_for_resource(
        timeout=300,
        condition=constants.STATUS_RUNNING,
        selector="app=rook-ceph-osd",
        resource_count=result * 3,
    )

    # Verify status of rook-ceph-osd-prepare pods. Verifies bug 1769061
    # pod.wait_for_resource(
    #     timeout=300,
    #     condition=constants.STATUS_COMPLETED,
    #     selector=constants.OSD_PREPARE_APP_LABEL,
    #     resource_count=result * 3
    # )
    # Commented this lines as a workaround due to bug 1842500

    # Verify OSDs are encrypted.
    if config.ENV_DATA.get("encryption_at_rest"):
        osd_encryption_verification()

    ceph_health_check(namespace=config.ENV_DATA["cluster_namespace"], tries=80)
    ceph_cluster_obj = CephCluster()
    assert ceph_cluster_obj.wait_for_rebalance(
        timeout=5400), "Data re-balance failed to complete"
コード例 #5
0
    def test_add_node(self):
        """
        Test for adding worker nodes to the cluster while IOs
        """
        new_nodes = 3
        if config.ENV_DATA["platform"].lower() in constants.CLOUD_PLATFORMS:
            dt = config.ENV_DATA["deployment_type"]
            if dt == "ipi":
                machines = machine_utils.get_machinesets()
                logger.info(
                    f"The worker nodes number before expansion {len(node.get_worker_nodes())}"
                )
                for machine in machines:
                    add_new_node_and_label_it(machine)
                logger.info(
                    f"The worker nodes number after expansion {len(node.get_worker_nodes())}"
                )

            else:
                logger.info(
                    f"The worker nodes number before expansion {len(node.get_worker_nodes())}"
                )
                if config.ENV_DATA.get("rhel_workers"):
                    node_type = constants.RHEL_OS
                else:
                    node_type = constants.RHCOS
                assert add_new_node_and_label_upi(node_type,
                                                  new_nodes), "Add node failed"
                logger.info(
                    f"The worker nodes number after expansion {len(node.get_worker_nodes())}"
                )

        elif config.ENV_DATA["platform"].lower() == constants.VSPHERE_PLATFORM:
            pytest.skip("Skipping add node in Vmware platform due to "
                        "https://bugzilla.redhat.com/show_bug.cgi?id=1844521")
            # Issue to remove skip code https://github.com/red-hat-storage/ocs-ci/issues/2403
            # logger.info(f'The worker nodes number before expansion {len(node.get_worker_nodes())}')
            # if config.ENV_DATA.get('rhel_user'):
            #     pytest.skip("Skipping add RHEL node, code unavailable")
            # node_type = constants.RHCOS
            # assert add_new_node_and_label_upi(node_type, new_nodes), "Add node failed"
            # logger.info(f'The worker nodes number after expansion {len(node.get_worker_nodes())}')
        ceph_cluster_obj = CephCluster()
        assert ceph_cluster_obj.wait_for_rebalance(
            timeout=3600), "Data re-balance failed to complete"
コード例 #6
0
    def test_add_capacity_internal(self, setup_ui):
        """
        Test Add Capacity on Internal cluster via UI

        """
        logger.info("Get osd pods before add capacity")
        osd_pods_before_add_capacity = get_osd_pods()
        osd_count = len(osd_pods_before_add_capacity)

        logger.info("Add capacity via UI")
        infra_ui_obj = AddReplaceDeviceUI(setup_ui)
        infra_ui_obj.add_capacity_ui()

        logging.info("Wait for osd pods to be in Running state")
        for osd_pods in TimeoutSampler(
            timeout=600,
            sleep=10,
            func=get_osd_pods,
        ):
            if len(osd_pods) == (osd_count + 3):
                break

        osd_pod_names = list()
        for osd_pod in osd_pods:
            wait_for_resource_state(
                resource=osd_pod, state=constants.STATUS_RUNNING, timeout=300
            )
            osd_pod_names.append(osd_pod.name)

        logger.info("Verify via ui, all osd pods in Running state")
        infra_ui_obj.verify_pod_status(pod_names=osd_pod_names)

        logger.info("Wait data re-balance to complete")
        ceph_cluster_obj = CephCluster()
        assert ceph_cluster_obj.wait_for_rebalance(
            timeout=5400
        ), "Data re-balance failed to complete"

        if config.ENV_DATA.get("encryption_at_rest"):
            osd_encryption_verification()
コード例 #7
0
    def test_upgrade_ocp(self, reduce_and_resume_cluster_load):
        """
        Tests OCS stability when upgrading OCP

        """

        ceph_cluster = CephCluster()
        with CephHealthMonitor(ceph_cluster):

            ocp_channel = config.UPGRADE.get("ocp_channel",
                                             ocp.get_ocp_upgrade_channel())
            ocp_upgrade_version = config.UPGRADE.get("ocp_upgrade_version")
            if not ocp_upgrade_version:
                ocp_upgrade_version = get_latest_ocp_version(
                    channel=ocp_channel)
                ocp_arch = config.UPGRADE["ocp_arch"]
                target_image = f"{ocp_upgrade_version}-{ocp_arch}"
            elif ocp_upgrade_version.endswith(".nightly"):
                target_image = expose_ocp_version(ocp_upgrade_version)

            logger.info(f"Target image; {target_image}")

            image_path = config.UPGRADE["ocp_upgrade_path"]
            cluster_operators = ocp.get_all_cluster_operators()
            logger.info(f" oc version: {ocp.get_current_oc_version()}")
            # Verify Upgrade subscription channel:
            ocp.patch_ocp_upgrade_channel(ocp_channel)
            for sampler in TimeoutSampler(
                    timeout=250,
                    sleep=15,
                    func=ocp.verify_ocp_upgrade_channel,
                    channel_variable=ocp_channel,
            ):
                if sampler:
                    logger.info(f"OCP Channel:{ocp_channel}")
                    break

            # Upgrade OCP
            logger.info(f"full upgrade path: {image_path}:{target_image}")
            ocp.upgrade_ocp(image=target_image, image_path=image_path)

            # Wait for upgrade
            for ocp_operator in cluster_operators:
                logger.info(f"Checking upgrade status of {ocp_operator}:")
                # ############ Workaround for issue 2624 #######
                name_changed_between_versions = (
                    "service-catalog-apiserver",
                    "service-catalog-controller-manager",
                )
                if ocp_operator in name_changed_between_versions:
                    logger.info(f"{ocp_operator} upgrade will not be verified")
                    continue
                # ############ End of Workaround ###############
                ver = ocp.get_cluster_operator_version(ocp_operator)
                logger.info(f"current {ocp_operator} version: {ver}")
                for sampler in TimeoutSampler(
                        timeout=2700,
                        sleep=60,
                        func=ocp.confirm_cluster_operator_version,
                        target_version=target_image,
                        cluster_operator=ocp_operator,
                ):
                    if sampler:
                        logger.info(f"{ocp_operator} upgrade completed!")
                        break
                    else:
                        logger.info(
                            f"{ocp_operator} upgrade did not completed yet!")

            # post upgrade validation: check cluster operator status
            cluster_operators = ocp.get_all_cluster_operators()
            for ocp_operator in cluster_operators:
                logger.info(f"Checking cluster status of {ocp_operator}")
                for sampler in TimeoutSampler(
                        timeout=2700,
                        sleep=60,
                        func=ocp.verify_cluster_operator_status,
                        cluster_operator=ocp_operator,
                ):
                    if sampler:
                        break
                    else:
                        logger.info(f"{ocp_operator} status is not valid")
            # Post upgrade validation: check cluster version status
            logger.info("Checking clusterversion status")
            for sampler in TimeoutSampler(
                    timeout=900,
                    sleep=15,
                    func=ocp.validate_cluster_version_status):
                if sampler:
                    logger.info("Upgrade Completed Successfully!")
                    break

        new_ceph_cluster = CephCluster()
        new_ceph_cluster.wait_for_rebalance(timeout=1800)
        ceph_health_check(tries=90, delay=30)
コード例 #8
0
ファイル: test_delete_pod.py プロジェクト: xenolinux/ocs-ci
    def test_add_capacity_with_resource_delete(self,
                                               workload_storageutilization_rbd,
                                               resource_name, resource_id,
                                               is_kill_resource_repeatedly):
        """
        The function get the resource name, and id.
        The function adds capacity to the cluster, and then delete the resource while
        storage capacity is getting increased.

        Args:
            resource_name (str): the name of the resource to delete
            resource_id (int): the id of the resource to delete
            is_kill_resource_repeatedly (bool): If True then kill the resource repeatedly. Else, if False
                delete the resource only once.

        """
        used_percentage = get_percent_used_capacity()
        logging.info(
            f"storageutilization is completed. used capacity = {used_percentage}"
        )

        osd_pods_before = pod_helpers.get_osd_pods()
        number_of_osd_pods_before = len(osd_pods_before)
        if number_of_osd_pods_before >= constants.MAX_OSDS:
            pytest.skip("We have maximum of OSDs in the cluster")

        d = Disruptions()
        d.set_resource(resource_name)

        self.new_pods_in_status_running = False

        osd_size = storage_cluster.get_osd_size()
        logging.info(f"Adding one new set of OSDs. osd size = {osd_size}")
        storagedeviceset_count = storage_cluster.add_capacity(osd_size)
        logging.info("Adding one new set of OSDs was issued without problems")

        # Wait for new osd's to come up. After the first new osd in status Init - delete the resource.
        # After deleting the resource we expect that all the new osd's will be in status running,
        # and the delete resource will be also in status running.
        pod_helpers.wait_for_new_osd_pods_to_come_up(number_of_osd_pods_before)
        logging.info(
            f"Delete a {resource_name} pod while storage capacity is getting increased"
        )
        if is_kill_resource_repeatedly:
            with ThreadPoolExecutor() as executor:
                executor.submit(self.kill_resource_repeatedly, resource_name,
                                resource_id)
                self.wait_for_osd_pods_to_be_running(storagedeviceset_count)
        else:
            d.delete_resource(resource_id)
            self.wait_for_osd_pods_to_be_running(storagedeviceset_count)

        self.new_pods_in_status_running = True
        logging.info(
            "Finished verifying add capacity when one of the pods gets deleted"
        )
        logging.info("Waiting for ceph health check to finished...")
        ceph_health_check(namespace=config.ENV_DATA['cluster_namespace'],
                          tries=90)
        ceph_cluster_obj = CephCluster()
        assert ceph_cluster_obj.wait_for_rebalance(
            timeout=1800), ("Data re-balance failed to complete")