예제 #1
0
 def create_config(self):
     """
     Create the OCP deploy config, if something needs to be changed fro
     specific platform you can overload this method in child class.
     """
     # Generate install-config from template
     logger.info("Generating install-config")
     _templating = templating.Templating()
     ocp_install_template = (f"install-config-{self.deployment_platform}-"
                             f"{self.deployment_type}.yaml.j2")
     ocp_install_template_path = os.path.join("ocp-deployment",
                                              ocp_install_template)
     install_config_str = _templating.render_template(
         ocp_install_template_path, config.ENV_DATA)
     # Log the install config *before* adding the pull secret,
     # so we don't leak sensitive data.
     logger.info(f"Install config: \n{install_config_str}")
     # Parse the rendered YAML so that we can manipulate the object directly
     install_config_obj = yaml.safe_load(install_config_str)
     install_config_obj['pullSecret'] = self.get_pull_secret()
     ssh_key = self.get_ssh_key()
     if ssh_key:
         install_config_obj['sshKey'] = ssh_key
     install_config_str = yaml.safe_dump(install_config_obj)
     install_config = os.path.join(self.cluster_path, "install-config.yaml")
     with open(install_config, "w") as f:
         f.write(install_config_str)
예제 #2
0
    def build_ansible_inventory(self, hosts):
        """
        Build the ansible hosts file from jinja template

        Args:
            hosts (list): list of private host names

        Returns:
            str: path of the ansible file created

        """
        _templating = templating.Templating()
        ansible_host_file = dict()
        ansible_host_file["ansible_user"] = "******"
        ansible_host_file["ansible_become"] = "True"
        ansible_host_file["pod_kubeconfig"] = "/kubeconfig"
        ansible_host_file["pod_pull_secret"] = "/tmp/pull-secret"
        ansible_host_file["rhel_worker_nodes"] = hosts

        logging.info(ansible_host_file)
        data = _templating.render_template(
            constants.ANSIBLE_INVENTORY_YAML,
            ansible_host_file,
        )
        logging.debug("Ansible hosts file:%s", data)
        host_file_path = "/tmp/hosts"
        with open(host_file_path, "w") as f:
            f.write(data)
        return host_file_path
예제 #3
0
    def deploy_ocp_prereq(self):
        """
        Perform generic prereq before calling openshift-installer
        This method performs all the basic steps necessary before invoking the
        installer
        """
        if self.teardown and not self.deploy:
            msg = f"Attempting teardown of non-accessible cluster: "
            msg += f"{self.cluster_path}"
            pytest.fail(msg)
        elif not self.deploy and not self.teardown:
            msg = "The given cluster can not be connected to: {}. ".format(
                self.cluster_path)
            msg += (f"Provide a valid --cluster-path or use --deploy to "
                    f"deploy a new cluster")
            pytest.fail(msg)
        elif not system.is_path_empty(self.cluster_path) and self.deploy:
            msg = "The given cluster path is not empty: {}. ".format(
                self.cluster_path)
            msg += (f"Provide an empty --cluster-path and --deploy to deploy "
                    f"a new cluster")
            pytest.fail(msg)
        else:
            logger.info(
                f"A testing cluster will be deployed and cluster information "
                f"stored at: %s", self.cluster_path)

        # Generate install-config from template
        logger.info("Generating install-config")
        pull_secret_path = os.path.join(constants.TOP_DIR, "data",
                                        "pull-secret")

        _templating = templating.Templating()
        install_config_str = _templating.render_template(
            "install-config.yaml.j2", config.ENV_DATA)
        # Log the install config *before* adding the pull secret,
        # so we don't leak sensitive data.
        logger.info(f"Install config: \n{install_config_str}")
        # Parse the rendered YAML so that we can manipulate the object directly
        install_config_obj = yaml.safe_load(install_config_str)
        with open(pull_secret_path, "r") as f:
            # Parse, then unparse, the JSON file.
            # We do this for two reasons: to ensure it is well-formatted, and
            # also to ensure it ends up as a single line.
            install_config_obj['pullSecret'] = json.dumps(json.loads(f.read()))
        install_config_str = yaml.safe_dump(install_config_obj)
        install_config = os.path.join(self.cluster_path, "install-config.yaml")
        with open(install_config, "w") as f:
            f.write(install_config_str)
예제 #4
0
def cleanup(cluster_name, cluster_id):
    """
    Cleanup existing cluster in AWS

    Args:
        cluster_name (str): Name of the cluster
        cluster_id (str): Cluster id to cleanup

    """
    data = {'cluster_name': cluster_name, 'cluster_id': cluster_id}
    template = templating.Templating(base_path=TEMPLATE_CLEANUP_DIR)
    cleanup_template = template.render_template(CLEANUP_YAML, data)
    cleanup_path = tempfile.mkdtemp(prefix='cleanup_')
    cleanup_file = os.path.join(cleanup_path, 'metadata.json')
    with open(cleanup_file, "w") as temp:
        temp.write(cleanup_template)
    bin_dir = os.path.expanduser(config.RUN['bin_dir'])
    oc_bin = os.path.join(bin_dir, "openshift-install")
    logger.info(f"cleaning up {cluster_id}")
    run_cmd(f"{oc_bin} destroy cluster --dir {cleanup_path} --log-level=debug")
예제 #5
0
    def deploy_ocs(self):
        """
        Handle OCS deployment, since OCS deployment steps are common to any
        platform, implementing OCS deployment here in base class.
        """
        _templating = templating.Templating()

        ceph_cluster = ocp.OCP(
            kind='CephCluster', namespace=self.namespace
        )
        try:
            ceph_cluster.get().get('items')[0]
            logger.warning("OCS cluster already exists")
            return
        except (IndexError, CommandFailed):
            logger.info("Running OCS basic installation")

        if not self.ocs_operator_deployment:
            create_oc_resource(
                'common.yaml', self.cluster_path, _templating, config.ENV_DATA
            )
            run_cmd(
                f'oc label namespace {config.ENV_DATA["cluster_namespace"]} '
                f'"openshift.io/cluster-monitoring=true"'
            )
            run_cmd(
                f"oc policy add-role-to-user view "
                f"system:serviceaccount:openshift-monitoring:prometheus-k8s "
                f"-n {self.namespace}"
            )
            # HACK: If you would like to drop this hack, make sure that you
            # also updated docs and write appropriate unit/integration tests
            # for config processing.
            if config.ENV_DATA.get('monitoring_enabled') in (
                "true", "True", True
            ):
                # RBAC rules for monitoring, based on documentation change in
                # rook:
                # https://github.com/rook/rook/commit/1b6fe840f6ae7372a9675ba727ecc65326708aa8
                # HACK: This should be dropped when OCS is managed by OLM
                apply_oc_resource(
                    'rbac.yaml',
                    self.cluster_path,
                    _templating,
                    config.ENV_DATA,
                    template_dir="monitoring"
                )
            # Increased to 15 seconds as 10 is not enough
            # TODO: do the sampler function and check if resource exist
            wait_time = 15
            logger.info(f"Waiting {wait_time} seconds...")
            time.sleep(wait_time)
            create_oc_resource(
                'operator-openshift.yaml', self.cluster_path,
                _templating, config.ENV_DATA
            )
            logger.info(f"Waiting {wait_time} seconds...")
            time.sleep(wait_time)
            run_cmd(
                f"oc wait --for condition=ready pod "
                f"-l app=rook-ceph-operator "
                f"-n {self.namespace} "
                f"--timeout=120s"
            )
            run_cmd(
                f"oc wait --for condition=ready pod "
                f"-l app=rook-discover "
                f"-n {self.namespace} "
                f"--timeout=120s"
            )
            create_oc_resource(
                'cluster.yaml', self.cluster_path, _templating, config.ENV_DATA
            )
        else:
            self.deploy_ocs_via_operator()

        pod = ocp.OCP(
            kind=constants.POD, namespace=self.namespace
        )
        cfs = ocp.OCP(
            kind=constants.CEPHFILESYSTEM,
            namespace=self.namespace
        )
        # Check for Ceph pods
        assert pod.wait_for_resource(
            condition='Running', selector='app=rook-ceph-mon',
            resource_count=3, timeout=600
        )
        assert pod.wait_for_resource(
            condition='Running', selector='app=rook-ceph-mgr',
            timeout=600
        )
        assert pod.wait_for_resource(
            condition='Running', selector='app=rook-ceph-osd',
            resource_count=3, timeout=600
        )

        # validate ceph mon/osd volumes are backed by pvc
        validate_cluster_on_pvc()

        # Creating toolbox pod
        setup_ceph_toolbox()

        assert pod.wait_for_resource(
            condition=constants.STATUS_RUNNING,
            selector='app=rook-ceph-tools', resource_count=1, timeout=600
        )

        if not self.ocs_operator_deployment:
            logger.info(f"Waiting {wait_time} seconds...")
            time.sleep(wait_time)
            # HACK: This should be dropped (including service-monitor.yaml and
            # prometheus-rules.yaml files) when OCS is managed by OLM
            if config.ENV_DATA.get('monitoring_enabled') not in (
                "true", "True", True
            ):
                # HACK: skip creation of rook-ceph-mgr service monitor when
                # monitoring is enabled (if this were not skipped, the step
                # would fail because rook would create the service monitor at
                # this point already)
                create_oc_resource(
                    "service-monitor.yaml", self.cluster_path, _templating,
                    config.ENV_DATA
                )
                # HACK: skip creation of prometheus-rules, rook-ceph is
                # concerned with it's setup now, based on clarification from
                # Umanga Chapagain
                create_oc_resource(
                    "prometheus-rules.yaml", self.cluster_path, _templating,
                    config.ENV_DATA
                )
            logger.info(f"Waiting {wait_time} seconds...")
            time.sleep(wait_time)

            # Create MDS pods for CephFileSystem
            fs_data = templating.load_yaml(constants.CEPHFILESYSTEM_YAML)
            fs_data['metadata']['namespace'] = self.namespace

            ceph_obj = OCS(**fs_data)
            ceph_obj.create()
            assert pod.wait_for_resource(
                condition=constants.STATUS_RUNNING, selector='app=rook-ceph-mds',
                resource_count=2, timeout=600
            )

        # Check for CephFilesystem creation in ocp
        cfs_data = cfs.get()
        cfs_name = cfs_data['items'][0]['metadata']['name']

        if helpers.validate_cephfilesystem(cfs_name):
            logger.info(f"MDS deployment is successful!")
            defaults.CEPHFILESYSTEM_NAME = cfs_name
        else:
            logger.error(
                f"MDS deployment Failed! Please check logs!"
            )

        if config.ENV_DATA.get('monitoring_enabled') and config.ENV_DATA.get('persistent-monitoring'):
            # Create a pool, secrets and sc
            secret_obj = helpers.create_secret(interface_type=constants.CEPHBLOCKPOOL)
            cbj_obj = helpers.create_ceph_block_pool()
            sc_obj = helpers.create_storage_class(
                interface_type=constants.CEPHBLOCKPOOL,
                interface_name=cbj_obj.name,
                secret_name=secret_obj.name
            )

            # Get the list of monitoring pods
            pods_list = get_all_pods(
                namespace=defaults.OCS_MONITORING_NAMESPACE,
                selector=['prometheus', 'alertmanager']
            )

            # Create configmap cluster-monitoring-config
            create_configmap_cluster_monitoring_pod(sc_obj.name)

            # Take some time to respin the pod
            waiting_time = 30
            logger.info(f"Waiting {waiting_time} seconds...")
            time.sleep(waiting_time)

            # Validate the pods are respinned and in running state
            validate_pods_are_respinned_and_running_state(
                pods_list
            )

            # Validate the pvc is created on monitoring pods
            validate_pvc_created_and_bound_on_monitoring_pods()

            # Validate the pvc are mounted on pods
            validate_pvc_are_mounted_on_monitoring_pods(pods_list)

        # Change registry backend to OCS CEPHFS RWX PVC
        registry.change_registry_backend_to_ocs()

        # Verify health of ceph cluster
        # TODO: move destroy cluster logic to new CLI usage pattern?
        logger.info("Done creating rook resources, waiting for HEALTH_OK")
        assert ceph_health_check(
            namespace=self.namespace
        )
        # patch gp2/thin storage class as 'non-default'
        self.patch_default_sc_to_non_default()
예제 #6
0
size is returned to backend pool
"""
import logging

import pytest

from ocs_ci.ocs import constants
from ocs_ci.ocs.exceptions import UnexpectedBehaviour
from tests import helpers
from ocs_ci.framework.testlib import tier1, acceptance, ManageTest
from ocs_ci.utility import templating
from ocs_ci.utility.retry import retry
from ocs_ci.ocs.resources import pod

logger = logging.getLogger(__name__)
_templating = templating.Templating()


@retry(UnexpectedBehaviour, tries=5, delay=3, backoff=1)
def verify_pv_not_exists(pvc_obj, cbp_name, rbd_image_id):
    """
    Ensure that pv does not exists
    """

    # Validate on ceph side
    logger.info(f"Verifying PV {pvc_obj.backed_pv} exists on backend")

    status = helpers.verify_volume_deleted_in_backend(
        interface=constants.CEPHBLOCKPOOL,
        image_uuid=rbd_image_id,
        pool_name=cbp_name)
예제 #7
0
파일: cleanup.py 프로젝트: wusui/ocs-ci
def cleanup(cluster_name, cluster_id, upi=False, failed_deletions=None):
    """
    Cleanup existing cluster in AWS

    Args:
        cluster_name (str): Name of the cluster
        cluster_id (str): Cluster id to cleanup
        upi (bool): True for UPI cluster, False otherwise
        failed_deletions (list): list of clusters we failed to delete, used
            for reporting purposes

    """
    data = {'cluster_name': cluster_name, 'cluster_id': cluster_id}
    template = templating.Templating(base_path=TEMPLATE_CLEANUP_DIR)
    cleanup_template = template.render_template(CLEANUP_YAML, data)
    cleanup_path = tempfile.mkdtemp(prefix='cleanup_')
    cleanup_file = os.path.join(cleanup_path, 'metadata.json')
    with open(cleanup_file, "w") as temp:
        temp.write(cleanup_template)
    bin_dir = os.path.expanduser(config.RUN['bin_dir'])
    oc_bin = os.path.join(bin_dir, "openshift-install")

    if upi:
        aws = AWS()
        rhel_workers = get_rhel_worker_instances(cleanup_path)
        logger.info(f"{cluster_name}'s RHEL workers: {rhel_workers}")
        if rhel_workers:
            terminate_rhel_workers(rhel_workers)
        # Destroy extra volumes
        destroy_volumes(cluster_name)

        stack_names = list()
        # Get master, bootstrap and security group stacks
        for stack_type in ['ma', 'bs', 'sg']:
            try:
                stack_names.append(
                    aws.get_cloudformation_stacks(
                        pattern=f"{cluster_name}-{stack_type}")[0]
                    ['StackName'])
            except ClientError:
                continue

        # Get the worker stacks
        worker_index = 0
        worker_stack_exists = True
        while worker_stack_exists:
            try:
                stack_names.append(
                    aws.get_cloudformation_stacks(
                        pattern=f"{cluster_name}-no{worker_index}")[0]
                    ['StackName'])
                worker_index += 1
            except ClientError:
                worker_stack_exists = False

        logger.info(f"Deleting stacks: {stack_names}")
        aws.delete_cloudformation_stacks(stack_names)

        # Destroy the cluster
        logger.info(f"cleaning up {cluster_id}")
        destroy_cluster(installer=oc_bin, cluster_path=cleanup_path)

        for stack_type in ['inf', 'vpc']:
            try:
                stack_names.append(
                    aws.get_cloudformation_stacks(
                        pattern=f"{cluster_name}-{stack_type}")[0]
                    ['StackName'])
            except ClientError:
                continue
        try:
            aws.delete_cloudformation_stacks(stack_names)
        except StackStatusError:
            logger.error('Failed to fully destroy cluster %s', cluster_name)
            if failed_deletions:
                failed_deletions.append(cluster_name)
            raise
    else:
        logger.info(f"cleaning up {cluster_id}")
        try:
            destroy_cluster(installer=oc_bin, cluster_path=cleanup_path)
        except CommandFailed:
            logger.error('Failed to fully destroy cluster %s', cluster_name)
            if failed_deletions:
                failed_deletions.append(cluster_name)
            raise
예제 #8
0
def cluster(request):
    log.info(f"All logs located at {log_path}")
    log.info("Running OCS basic installation")
    cluster_path = config.ENV_DATA['cluster_path']
    deploy = config.RUN['cli_params']['deploy']
    teardown = config.RUN['cli_params']['teardown']
    # Add a finalizer to teardown the cluster after test execution is finished
    if teardown:
        request.addfinalizer(cluster_teardown)
        log.info("Will teardown cluster because --teardown was provided")
    # Test cluster access and if exist just skip the deployment.
    if is_cluster_running(cluster_path):
        log.info("The installation is skipped because the cluster is running")
        return
    elif teardown and not deploy:
        log.info("Attempting teardown of non-accessible cluster: %s",
                 cluster_path)
        return
    elif not deploy and not teardown:
        msg = "The given cluster can not be connected to: {}. ".format(
            cluster_path)
        msg += "Provide a valid --cluster-path or use --deploy to deploy a new cluster"
        pytest.fail(msg)
    elif not system.is_path_empty(cluster_path) and deploy:
        msg = "The given cluster path is not empty: {}. ".format(cluster_path)
        msg += "Provide an empty --cluster-path and --deploy to deploy a new cluster"
        pytest.fail(msg)
    else:
        log.info(
            "A testing cluster will be deployed and cluster information stored at: %s",
            cluster_path)

    # Generate install-config from template
    log.info("Generating install-config")
    pull_secret_path = os.path.join(constants.TOP_DIR, "data", "pull-secret")

    # TODO: check for supported platform and raise the exception if not
    # supported. Currently we support just AWS.

    _templating = templating.Templating()
    install_config_str = _templating.render_template("install-config.yaml.j2",
                                                     config.ENV_DATA)
    # Log the install config *before* adding the pull secret, so we don't leak
    # sensitive data.
    log.info(f"Install config: \n{install_config_str}")
    # Parse the rendered YAML so that we can manipulate the object directly
    install_config_obj = yaml.safe_load(install_config_str)
    with open(pull_secret_path, "r") as f:
        # Parse, then unparse, the JSON file.
        # We do this for two reasons: to ensure it is well-formatted, and
        # also to ensure it ends up as a single line.
        install_config_obj['pullSecret'] = json.dumps(json.loads(f.read()))
    install_config_str = yaml.safe_dump(install_config_obj)
    install_config = os.path.join(cluster_path, "install-config.yaml")
    with open(install_config, "w") as f:
        f.write(install_config_str)

    # Download installer
    installer = get_openshift_installer(config.DEPLOYMENT['installer_version'])
    # Download client
    get_openshift_client()

    # Deploy cluster
    log.info("Deploying cluster")
    run_cmd(f"{installer} create cluster "
            f"--dir {cluster_path} "
            f"--log-level debug")

    # Test cluster access
    if not OCP.set_kubeconfig(
            os.path.join(cluster_path, config.RUN.get('kubeconfig_location'))):
        pytest.fail("Cluster is not available!")

    # TODO: Create cluster object, add to config.ENV_DATA for other tests to
    # utilize.
    # Determine worker pattern and create ebs volumes
    with open(os.path.join(cluster_path, "terraform.tfvars")) as f:
        tfvars = json.load(f)

    cluster_id = tfvars['cluster_id']
    worker_pattern = f'{cluster_id}-worker*'
    log.info(f'Worker pattern: {worker_pattern}')
    create_ebs_volumes(worker_pattern, region_name=config.ENV_DATA['region'])

    # render templates and create resources
    create_oc_resource('common.yaml', cluster_path, _templating,
                       config.ENV_DATA)
    run_cmd(f'oc label namespace {config.ENV_DATA["cluster_namespace"]} '
            f'"openshift.io/cluster-monitoring=true"')
    run_cmd(f"oc policy add-role-to-user view "
            f"system:serviceaccount:openshift-monitoring:prometheus-k8s "
            f"-n {config.ENV_DATA['cluster_namespace']}")
    apply_oc_resource('csi-nodeplugin-rbac_rbd.yaml',
                      cluster_path,
                      _templating,
                      config.ENV_DATA,
                      template_dir="ocs-deployment/csi/rbd/")
    apply_oc_resource('csi-provisioner-rbac_rbd.yaml',
                      cluster_path,
                      _templating,
                      config.ENV_DATA,
                      template_dir="ocs-deployment/csi/rbd/")
    apply_oc_resource('csi-nodeplugin-rbac_cephfs.yaml',
                      cluster_path,
                      _templating,
                      config.ENV_DATA,
                      template_dir="ocs-deployment/csi/cephfs/")
    apply_oc_resource('csi-provisioner-rbac_cephfs.yaml',
                      cluster_path,
                      _templating,
                      config.ENV_DATA,
                      template_dir="ocs-deployment/csi/cephfs/")
    # Increased to 15 seconds as 10 is not enough
    # TODO: do the sampler function and check if resource exist
    wait_time = 15
    log.info(f"Waiting {wait_time} seconds...")
    time.sleep(wait_time)
    create_oc_resource('operator-openshift-with-csi.yaml', cluster_path,
                       _templating, config.ENV_DATA)
    log.info(f"Waiting {wait_time} seconds...")
    time.sleep(wait_time)
    run_cmd(f"oc wait --for condition=ready pod "
            f"-l app=rook-ceph-operator "
            f"-n {config.ENV_DATA['cluster_namespace']} "
            f"--timeout=120s")
    run_cmd(f"oc wait --for condition=ready pod "
            f"-l app=rook-discover "
            f"-n {config.ENV_DATA['cluster_namespace']} "
            f"--timeout=120s")
    create_oc_resource('cluster.yaml', cluster_path, _templating,
                       config.ENV_DATA)

    POD = ocp.OCP(kind=constants.POD,
                  namespace=config.ENV_DATA['cluster_namespace'])
    CFS = ocp.OCP(kind=constants.CEPHFILESYSTEM,
                  namespace=config.ENV_DATA['cluster_namespace'])

    # Check for the Running status of Ceph Pods
    run_cmd(f"oc wait --for condition=ready pod "
            f"-l app=rook-ceph-agent "
            f"-n {config.ENV_DATA['cluster_namespace']} "
            f"--timeout=120s")
    assert POD.wait_for_resource(condition='Running',
                                 selector='app=rook-ceph-mon',
                                 resource_count=3,
                                 timeout=600)
    assert POD.wait_for_resource(condition='Running',
                                 selector='app=rook-ceph-mgr',
                                 timeout=600)
    assert POD.wait_for_resource(condition='Running',
                                 selector='app=rook-ceph-osd',
                                 resource_count=3,
                                 timeout=600)

    create_oc_resource('toolbox.yaml', cluster_path, _templating,
                       config.ENV_DATA)
    log.info(f"Waiting {wait_time} seconds...")
    time.sleep(wait_time)
    create_oc_resource('storage-manifest.yaml', cluster_path, _templating,
                       config.ENV_DATA)
    create_oc_resource("service-monitor.yaml", cluster_path, _templating,
                       config.ENV_DATA)
    create_oc_resource("prometheus-rules.yaml", cluster_path, _templating,
                       config.ENV_DATA)
    log.info(f"Waiting {wait_time} seconds...")
    time.sleep(wait_time)

    # Create MDS pods for CephFileSystem
    fs_data = templating.load_yaml_to_dict(constants.CEPHFILESYSTEM_YAML)
    fs_data['metadata']['namespace'] = config.ENV_DATA['cluster_namespace']

    ceph_obj = OCS(**fs_data)
    ceph_obj.create()
    assert POD.wait_for_resource(condition=constants.STATUS_RUNNING,
                                 selector='app=rook-ceph-mds',
                                 resource_count=2,
                                 timeout=600)

    # Check for CephFilesystem creation in ocp
    cfs_data = CFS.get()
    cfs_name = cfs_data['items'][0]['metadata']['name']

    if helpers.validate_cephfilesystem(cfs_name):
        log.info(f"MDS deployment is successful!")
        defaults.CEPHFILESYSTEM_NAME = cfs_name
    else:
        log.error(f"MDS deployment Failed! Please check logs!")

    # Verify health of ceph cluster
    # TODO: move destroy cluster logic to new CLI usage pattern?
    log.info("Done creating rook resources, waiting for HEALTH_OK")
    assert ceph_health_check(namespace=config.ENV_DATA['cluster_namespace'])
예제 #9
0
    def deploy_ocs(self):
        """
        Handle OCS deployment, since OCS deployment steps are common to any
        platform, implementing OCS deployment here in base class.
        """
        _templating = templating.Templating()

        ceph_cluster = ocp.OCP(kind='CephCluster',
                               namespace=config.ENV_DATA['cluster_namespace'])
        try:
            ceph_cluster.get().get('items')[0]
            logger.warning("OCS cluster already exists")
            return
        except (IndexError, CommandFailed):
            logger.info("Running OCS basic installation")

        create_oc_resource('common.yaml', self.cluster_path, _templating,
                           config.ENV_DATA)

        run_cmd(f'oc label namespace {config.ENV_DATA["cluster_namespace"]} '
                f'"openshift.io/cluster-monitoring=true"')
        run_cmd(f"oc policy add-role-to-user view "
                f"system:serviceaccount:openshift-monitoring:prometheus-k8s "
                f"-n {config.ENV_DATA['cluster_namespace']}")
        apply_oc_resource('csi-nodeplugin-rbac_rbd.yaml',
                          self.cluster_path,
                          _templating,
                          config.ENV_DATA,
                          template_dir="ocs-deployment/csi/rbd/")
        apply_oc_resource('csi-provisioner-rbac_rbd.yaml',
                          self.cluster_path,
                          _templating,
                          config.ENV_DATA,
                          template_dir="ocs-deployment/csi/rbd/")
        apply_oc_resource('csi-nodeplugin-rbac_cephfs.yaml',
                          self.cluster_path,
                          _templating,
                          config.ENV_DATA,
                          template_dir="ocs-deployment/csi/cephfs/")
        apply_oc_resource('csi-provisioner-rbac_cephfs.yaml',
                          self.cluster_path,
                          _templating,
                          config.ENV_DATA,
                          template_dir="ocs-deployment/csi/cephfs/")
        # Increased to 15 seconds as 10 is not enough
        # TODO: do the sampler function and check if resource exist
        wait_time = 15
        logger.info(f"Waiting {wait_time} seconds...")
        time.sleep(wait_time)
        create_oc_resource('operator-openshift-with-csi.yaml',
                           self.cluster_path, _templating, config.ENV_DATA)
        logger.info(f"Waiting {wait_time} seconds...")
        time.sleep(wait_time)
        run_cmd(f"oc wait --for condition=ready pod "
                f"-l app=rook-ceph-operator "
                f"-n {config.ENV_DATA['cluster_namespace']} "
                f"--timeout=120s")
        run_cmd(f"oc wait --for condition=ready pod "
                f"-l app=rook-discover "
                f"-n {config.ENV_DATA['cluster_namespace']} "
                f"--timeout=120s")
        create_oc_resource('cluster.yaml', self.cluster_path, _templating,
                           config.ENV_DATA)

        pod = ocp.OCP(kind=constants.POD,
                      namespace=config.ENV_DATA['cluster_namespace'])
        cfs = ocp.OCP(kind=constants.CEPHFILESYSTEM,
                      namespace=config.ENV_DATA['cluster_namespace'])

        # Check for the Running status of Ceph Pods
        run_cmd(f"oc wait --for condition=ready pod "
                f"-l app=rook-ceph-agent "
                f"-n {config.ENV_DATA['cluster_namespace']} "
                f"--timeout=120s")
        assert pod.wait_for_resource(condition='Running',
                                     selector='app=rook-ceph-mon',
                                     resource_count=3,
                                     timeout=600)
        assert pod.wait_for_resource(condition='Running',
                                     selector='app=rook-ceph-mgr',
                                     timeout=600)
        assert pod.wait_for_resource(condition='Running',
                                     selector='app=rook-ceph-osd',
                                     resource_count=3,
                                     timeout=600)

        create_oc_resource('toolbox.yaml', self.cluster_path, _templating,
                           config.ENV_DATA)
        logger.info(f"Waiting {wait_time} seconds...")
        time.sleep(wait_time)
        create_oc_resource('storage-manifest.yaml', self.cluster_path,
                           _templating, config.ENV_DATA)
        create_oc_resource("service-monitor.yaml", self.cluster_path,
                           _templating, config.ENV_DATA)
        create_oc_resource("prometheus-rules.yaml", self.cluster_path,
                           _templating, config.ENV_DATA)
        logger.info(f"Waiting {wait_time} seconds...")
        time.sleep(wait_time)

        # Create MDS pods for CephFileSystem
        fs_data = templating.load_yaml_to_dict(constants.CEPHFILESYSTEM_YAML)
        fs_data['metadata']['namespace'] = config.ENV_DATA['cluster_namespace']

        ceph_obj = OCS(**fs_data)
        ceph_obj.create()
        assert pod.wait_for_resource(condition=constants.STATUS_RUNNING,
                                     selector='app=rook-ceph-mds',
                                     resource_count=2,
                                     timeout=600)

        # Check for CephFilesystem creation in ocp
        cfs_data = cfs.get()
        cfs_name = cfs_data['items'][0]['metadata']['name']

        if helpers.validate_cephfilesystem(cfs_name):
            logger.info(f"MDS deployment is successful!")
            defaults.CEPHFILESYSTEM_NAME = cfs_name
        else:
            logger.error(f"MDS deployment Failed! Please check logs!")

        # Verify health of ceph cluster
        # TODO: move destroy cluster logic to new CLI usage pattern?
        logger.info("Done creating rook resources, waiting for HEALTH_OK")
        assert ceph_health_check(
            namespace=config.ENV_DATA['cluster_namespace'])
        # patch gp2 (EBS) storage class as 'non-default'
        logger.info("Patch gp2 storageclass as non-default")
        patch = " '{\"metadata\": {\"annotations\":{\"storageclass.kubernetes.io/is-default-class\":\"false\"}}}' "
        run_cmd(f"oc patch storageclass gp2 "
                f"-p {patch} "
                f"--request-timeout=120s")
예제 #10
0
    def deploy_ocs(self):
        """
        Handle OCS deployment, since OCS deployment steps are common to any
        platform, implementing OCS deployment here in base class.
        """
        _templating = templating.Templating()

        ceph_cluster = ocp.OCP(
            kind='CephCluster', namespace=config.ENV_DATA['cluster_namespace']
        )
        try:
            ceph_cluster.get().get('items')[0]
            logger.warning("OCS cluster already exists")
            return
        except (IndexError, CommandFailed):
            logger.info("Running OCS basic installation")

        create_oc_resource(
            'common.yaml', self.cluster_path, _templating, config.ENV_DATA
        )

        run_cmd(
            f'oc label namespace {config.ENV_DATA["cluster_namespace"]} '
            f'"openshift.io/cluster-monitoring=true"'
        )
        run_cmd(
            f"oc policy add-role-to-user view "
            f"system:serviceaccount:openshift-monitoring:prometheus-k8s "
            f"-n {config.ENV_DATA['cluster_namespace']}"
        )
        # HACK: If you would like to drop this hack, make sure that you also
        # updated docs and write appropriate unit/integration tests for config
        # processing.
        if config.ENV_DATA.get('monitoring_enabled') in ("true", "True", True):
            # RBAC rules for monitoring, based on documentation change in rook:
            # https://github.com/rook/rook/commit/1b6fe840f6ae7372a9675ba727ecc65326708aa8
            # HACK: This should be dropped when OCS is managed by OLM
            apply_oc_resource(
                'rbac.yaml',
                self.cluster_path,
                _templating,
                config.ENV_DATA,
                template_dir="monitoring"
            )
        # Increased to 15 seconds as 10 is not enough
        # TODO: do the sampler function and check if resource exist
        wait_time = 15
        logger.info(f"Waiting {wait_time} seconds...")
        time.sleep(wait_time)
        create_oc_resource(
            'operator-openshift.yaml', self.cluster_path,
            _templating, config.ENV_DATA
        )
        logger.info(f"Waiting {wait_time} seconds...")
        time.sleep(wait_time)
        run_cmd(
            f"oc wait --for condition=ready pod "
            f"-l app=rook-ceph-operator "
            f"-n {config.ENV_DATA['cluster_namespace']} "
            f"--timeout=120s"
        )
        run_cmd(
            f"oc wait --for condition=ready pod "
            f"-l app=rook-discover "
            f"-n {config.ENV_DATA['cluster_namespace']} "
            f"--timeout=120s"
        )
        create_oc_resource(
            'cluster.yaml', self.cluster_path, _templating, config.ENV_DATA
        )

        pod = ocp.OCP(
            kind=constants.POD, namespace=config.ENV_DATA['cluster_namespace']
        )
        cfs = ocp.OCP(
            kind=constants.CEPHFILESYSTEM,
            namespace=config.ENV_DATA['cluster_namespace']
        )
        # Check for Ceph pods
        assert pod.wait_for_resource(
            condition='Running', selector='app=rook-ceph-mon',
            resource_count=3, timeout=600
        )
        assert pod.wait_for_resource(
            condition='Running', selector='app=rook-ceph-mgr',
            timeout=600
        )
        assert pod.wait_for_resource(
            condition='Running', selector='app=rook-ceph-osd',
            resource_count=3, timeout=600
        )

        create_oc_resource(
            'toolbox.yaml', self.cluster_path, _templating, config.ENV_DATA
        )
        logger.info(f"Waiting {wait_time} seconds...")
        time.sleep(wait_time)
        # HACK: This should be dropped (including service-monitor.yaml and
        # prometheus-rules.yaml files) when OCS is managed by OLM
        if config.ENV_DATA.get('monitoring_enabled') not in ("true", "True", True):
            # HACK: skip creation of rook-ceph-mgr service monitor when monitoring
            # is enabled (if this were not skipped, the step would fail because
            # rook would create the service monitor at this point already)
            create_oc_resource(
                "service-monitor.yaml", self.cluster_path, _templating,
                config.ENV_DATA
            )
            # HACK: skip creation of prometheus-rules, rook-ceph is concerned
            # with it's setup now, based on clarification from Umanga
            # Chapagain
            create_oc_resource(
                "prometheus-rules.yaml", self.cluster_path, _templating,
                config.ENV_DATA
            )
        logger.info(f"Waiting {wait_time} seconds...")
        time.sleep(wait_time)

        # Create MDS pods for CephFileSystem
        fs_data = templating.load_yaml_to_dict(constants.CEPHFILESYSTEM_YAML)
        fs_data['metadata']['namespace'] = config.ENV_DATA['cluster_namespace']

        ceph_obj = OCS(**fs_data)
        ceph_obj.create()
        assert pod.wait_for_resource(
            condition=constants.STATUS_RUNNING, selector='app=rook-ceph-mds',
            resource_count=2, timeout=600
        )

        # Check for CephFilesystem creation in ocp
        cfs_data = cfs.get()
        cfs_name = cfs_data['items'][0]['metadata']['name']

        if helpers.validate_cephfilesystem(cfs_name):
            logger.info(f"MDS deployment is successful!")
            defaults.CEPHFILESYSTEM_NAME = cfs_name
        else:
            logger.error(
                f"MDS deployment Failed! Please check logs!"
            )

        # Verify health of ceph cluster
        # TODO: move destroy cluster logic to new CLI usage pattern?
        logger.info("Done creating rook resources, waiting for HEALTH_OK")
        assert ceph_health_check(
            namespace=config.ENV_DATA['cluster_namespace']
        )
        # patch gp2 (EBS) storage class as 'non-default'
        logger.info("Patch gp2 storageclass as non-default")
        patch = " '{\"metadata\": {\"annotations\":{\"storageclass.kubernetes.io/is-default-class\":\"false\"}}}' "
        run_cmd(
            f"oc patch storageclass gp2 "
            f"-p {patch} "
            f"--request-timeout=120s"
        )