Esempio n. 1
0
    def set_kubeconfig(kubeconfig_path):
        """
        Export environment variable KUBECONFIG for future calls of OC commands
        or other API calls

        Args:
            kubeconfig_path (str): path to kubeconfig file to be exported

        Returns:
            boolean: True if successfully connected to cluster, False otherwise
        """
        # Test cluster access
        log.info("Testing access to cluster with %s", kubeconfig_path)
        if not os.path.isfile(kubeconfig_path):
            log.warning("The kubeconfig file %s doesn't exist!",
                        kubeconfig_path)
            return False
        os.environ['KUBECONFIG'] = kubeconfig_path
        try:
            run_cmd("oc cluster-info")
        except CommandFailed as ex:
            log.error("Cluster is not ready to use: %s", ex)
            return False
        log.info("Access to cluster is OK!")
        return True
Esempio n. 2
0
def create_oc_resource(
    template_name,
    cluster_path,
    _templating,
    template_data={},
    template_dir="ocs-deployment",
):
    """
    Create an oc resource after rendering the specified template with
    the rook data from cluster_conf.

    Args:
        template_name (str): Name of the ocs-deployment config template
        cluster_path (str): Path to cluster directory, where files will be
            written
        _templating (Templating): Object of Templating class used for
            templating
        template_data (dict): Data for render template (default: {})
        template_dir (str): Directory under templates dir where template
            exists (default: ocs-deployment)
    """
    template_path = os.path.join(template_dir, template_name)
    template = _templating.render_template(template_path, template_data)
    cfg_file = os.path.join(cluster_path, template_name)
    with open(cfg_file, "w") as f:
        f.write(template)
    log.info(f"Creating rook resource from {template_name}")
    run_cmd(f"oc create -f {cfg_file}")
Esempio n. 3
0
def ceph_health_check():
    """
    Exec `ceph health` cmd on tools pod to determine health of cluster.

    Raises:
        CephHealthException: If the ceph health returned is not HEALTH_OK
        CommandFailed: If the command to retrieve the tools pod name or the
            command to get ceph health returns a non-zero exit code
    Returns:
        0 if HEALTH_OK

    """
    # TODO: grab namespace-name from rook data, default to openshift-storage
    namespace = "openshift-storage"
    run_cmd(f"oc wait --for condition=ready pod "
            f"-l app=rook-ceph-tools "
            f"-n {namespace} "
            f"--timeout=120s")
    tools_pod = run_cmd(f"oc -n {namespace} get pod -l 'app=rook-ceph-tools' "
                        f"-o jsonpath='{{.items[0].metadata.name}}'")
    health = run_cmd(f"oc -n {namespace} exec {tools_pod} ceph health")
    if health.strip() == "HEALTH_OK":
        log.info("HEALTH_OK, install successful.")
        return 0
    else:
        raise CephHealthException(
            f"Ceph cluster health is not OK. Health: {health}")
Esempio n. 4
0
def ceph_health_check(namespace=default.ROOK_CLUSTER_NAMESPACE):
    """
    Exec `ceph health` cmd on tools pod to determine health of cluster.

    Args:
        namespace (str): Namespace of of OCS (default:
            default.ROOK_CLUSER_NAMESPACE)

    Raises:
        CephHealthException: If the ceph health returned is not HEALTH_OK
        CommandFailed: If the command to retrieve the tools pod name or the
            command to get ceph health returns a non-zero exit code
    Returns:
        0 if HEALTH_OK

    """
    run_cmd(f"oc wait --for condition=ready pod "
            f"-l app=rook-ceph-tools "
            f"-n {namespace} "
            f"--timeout=120s")
    tools_pod = run_cmd(f"oc -n {namespace} get pod -l 'app=rook-ceph-tools' "
                        f"-o jsonpath='{{.items[0].metadata.name}}'")
    health = run_cmd(f"oc -n {namespace} exec {tools_pod} ceph health")
    if health.strip() == "HEALTH_OK":
        log.info("HEALTH_OK, install successful.")
        return 0
    else:
        raise CephHealthException(
            f"Ceph cluster health is not OK. Health: {health}")
Esempio n. 5
0
    def exec_oc_cmd(self, command):
        """
        Executing 'oc' command

        Args:
            command (str): The command to execute (e.g. create -f file.yaml)
                without the initial 'oc' at the beginning

        Returns:
            dict: Dictionary represents a returned yaml file
        """
        oc_cmd = "oc "
        kubeconfig = os.getenv('KUBECONFIG')
        if self.namespace:
            oc_cmd += f"-n {self.namespace} "

        if kubeconfig:
            oc_cmd += f"--kubeconfig {kubeconfig} "

        oc_cmd += command
        out = run_cmd(cmd=oc_cmd)

        try:
            if out.startswith('hints = '):
                out = out[out.index('{'):]
        except ValueError:
            pass

        return yaml.safe_load(out)
Esempio n. 6
0
    def new_project(self, project_name):
        """
        Creates a new project

        Args:
            project_name (str): Name of the project to be created

        Returns:
            bool: True in case project creation succeeded, False otherwise
        """
        command = f"oc new-project {project_name}"
        if f'Now using project "{project_name}"' in run_cmd(f"{command}"):
            return True
        return False
Esempio n. 7
0
def switch_to_project(project_name):
    """
    Switch to another project

    Args:
        project_name (str): Name of the project to be switched to

    Returns:
        bool: True on success, False otherwise
    """
    log.info(f'Switching to project {project_name}')
    cmd = f'oc project {project_name}'
    success_msgs = [
        f'Now using project "{project_name}"',
        f'Already on project "{project_name}"'
    ]
    ret = run_cmd(cmd)
    if any(msg in ret for msg in success_msgs):
        return True
    return False
Esempio n. 8
0
    def exec_oc_cmd(self, command):
        """
        Executing 'oc' command

        Args:
            command (str): The command to execute (e.g. create -f file.yaml)
                without the initial 'oc' at the beginning

        Returns:
            Munch Obj: this object represents a returned yaml file
        """
        oc_cmd = "oc "
        kubeconfig = os.getenv('KUBECONFIG')
        if self.namespace:
            oc_cmd += f"-n {self.namespace} "

        if kubeconfig:
            oc_cmd += f"--kubeconfig {kubeconfig} "

        oc_cmd += command
        out = run_cmd(cmd=oc_cmd)
        return munchify(yaml.safe_load(out))
Esempio n. 9
0
def run(**kwargs):
    log.info("Running OCS basic installation")
    test_data = kwargs.get('test_data')
    cluster_path = test_data.get('cluster-path')
    # Test cluster access and if exist just skip the deployment.
    if cluster_path and OCP.set_kubeconfig(
            os.path.join(cluster_path, default.KUBECONFIG_LOCATION)):
        return TestStatus.SKIPPED
    config = kwargs.get('config')
    cluster_conf = kwargs.get('cluster_conf')
    workers = masters = aws_region = None
    if cluster_conf:
        cluster_details = cluster_conf.get('aws', {}).get('cluster', {})
        workers = cluster_details.get('workers')
        masters = cluster_details.get('masters')
        aws_region = cluster_details.get('region', default.AWS_REGION)

    # Generate install-config from template
    log.info("Generating install-config")
    # TODO: determine better place to create cluster directories - (log dir?)
    cluster_dir_parent = "/tmp"
    cluster_name = test_data.get('cluster-name')
    base_cluster_name = test_data.get('cluster-name', default.CLUSTER_NAME)
    cid = random.randint(10000, 99999)
    if not (cluster_name and cluster_path):
        cluster_name = f"{base_cluster_name}-{cid}"
    if not cluster_path:
        cluster_path = os.path.join(cluster_dir_parent, cluster_name)
    run_cmd(f"mkdir -p {cluster_path}")
    pull_secret_path = os.path.join(templating.TOP_DIR, "data", "pull-secret")
    with open(pull_secret_path, "r") as f:
        pull_secret = f.readline()

    data = {
        "cluster_name": cluster_name,
        "pull_secret": pull_secret,
    }
    if workers:
        data.update({'worker_replicas': workers})
    if masters:
        data.update({'master_replicas': masters})
    if aws_region:
        data.update({'region': aws_region})

    _templating = templating.Templating()
    template = _templating.render_template("install-config.yaml.j2", data)
    log.info(f"Install config: \n{template}")
    install_config = os.path.join(cluster_path, "install-config.yaml")
    with open(install_config, "w") as f:
        f.write(template)

    # Download installer
    installer_filename = "openshift-install"
    tarball = f"{installer_filename}.tar.gz"
    if os.path.isfile(installer_filename):
        log.info("Installer exists, skipping download")
    else:
        log.info("Downloading openshift installer")
        ver = config.get('installer-version', default.INSTALLER_VERSION)
        if platform.system() == "Darwin":
            os_type = "mac"
        elif platform.system() == "Linux":
            os_type = "linux"
        else:
            raise UnsupportedOSType
        url = (f"https://mirror.openshift.com/pub/openshift-v4/clients/ocp/"
               f"{ver}/openshift-install-{os_type}-{ver}.tar.gz")
        download_file(url, tarball)
        run_cmd(f"tar xzvf {tarball}")

    # Deploy cluster
    log.info("Deploying cluster")
    run_cmd(f"./openshift-install create cluster "
            f"--dir {cluster_path} "
            f"--log-level debug")

    # Test cluster access
    if not OCP.set_kubeconfig(
            os.path.join(cluster_path, default.KUBECONFIG_LOCATION)):
        return TestStatus.FAILED

    # TODO: Create cluster object, add to test_data for other tests to utilize
    # Determine worker pattern and create ebs volumes
    with open(os.path.join(cluster_path, "terraform.tfvars")) as f:
        tfvars = json.load(f)

    cluster_id = tfvars['cluster_id']
    worker_pattern = f'{cluster_id}-worker*'
    log.info(f'Worker pattern: {worker_pattern}')
    region_name = aws_region if aws_region else default.AWS_REGION
    create_ebs_volumes(worker_pattern, region_name=region_name)

    # Use Rook to install Ceph cluster
    # retrieve rook config from cluster_conf
    rook_data = {}
    if cluster_conf:
        rook_data = cluster_conf.get('rook', {})

    # render templates and create resources
    create_oc_resource('common.yaml', rook_data, cluster_path, _templating)
    run_cmd('oc label namespace openshift-storage '
            '"openshift.io/cluster-monitoring=true"')
    run_cmd("oc policy add-role-to-user view "
            "system:serviceaccount:openshift-monitoring:prometheus-k8s "
            "-n openshift-storage")
    create_oc_resource('operator-openshift.yaml', rook_data, cluster_path,
                       _templating)
    wait_time = 5
    log.info(f"Waiting {wait_time} seconds...")
    time.sleep(wait_time)
    run_cmd("oc wait --for condition=ready pod "
            "-l app=rook-ceph-operator "
            "-n openshift-storage "
            "--timeout=120s")
    run_cmd("oc wait --for condition=ready pod "
            "-l app=rook-ceph-agent "
            "-n openshift-storage "
            "--timeout=120s")
    run_cmd("oc wait --for condition=ready pod "
            "-l app=rook-discover "
            "-n openshift-storage "
            "--timeout=120s")
    create_oc_resource('cluster.yaml', rook_data, cluster_path, _templating)
    create_oc_resource('toolbox.yaml', rook_data, cluster_path, _templating)
    log.info(f"Waiting {wait_time} seconds...")
    time.sleep(wait_time)
    create_oc_resource('storage-manifest.yaml', rook_data, cluster_path,
                       _templating)
    create_oc_resource("service-monitor.yaml", rook_data, cluster_path,
                       _templating)
    create_oc_resource("prometheus-rules.yaml", rook_data, cluster_path,
                       _templating)

    # Verify health of ceph cluster
    # TODO: move destroy cluster logic to new CLI usage pattern?
    log.info("Done creating rook resources, waiting for HEALTH_OK")
    rc = ceph_health_check()

    # Destroy cluster (if configured)
    destroy_cmd = (f"./openshift-install destroy cluster "
                   f"--dir {cluster_path} "
                   f"--log-level debug")
    if config.get("destroy-cluster"):
        log.info("Destroying cluster")
        run_cmd(destroy_cmd)
        # TODO: destroy volumes created
        os.remove(installer_filename)
        os.remove(tarball)
    else:
        log.info(f"Cluster directory is located here: {cluster_path}")
        log.info(
            f"Skipping cluster destroy. "
            f"To manually destroy the cluster execute the following cmd:\n"
            f"{destroy_cmd}")

    return rc
Esempio n. 10
0
def run(**kwargs):
    log.info("Running OCS basic installation")
    test_data = kwargs.get('test_data')
    cluster_path = test_data.get('cluster-path')
    # Test cluster access and if exist just skip the deployment.
    if cluster_path and OCP.set_kubeconfig(
            os.path.join(cluster_path, default.KUBECONFIG_LOCATION)):
        return TestStatus.SKIPPED
    config = kwargs.get('config')
    cluster_conf = kwargs.get('cluster_conf', {})

    env_data = deepcopy(default.ENV_DATA)
    custom_env_data = cluster_conf.get('env_data', {})
    # Generate install-config from template
    log.info("Generating install-config")
    # TODO: determine better place to create cluster directories - (log dir?)
    cluster_dir_parent = "/tmp"
    cluster_name = test_data.get('cluster-name')
    base_cluster_name = test_data.get('cluster-name', default.CLUSTER_NAME)
    cid = random.randint(10000, 99999)
    if not (cluster_name and cluster_path):
        cluster_name = f"{base_cluster_name}-{cid}"
    if not cluster_path:
        cluster_path = os.path.join(cluster_dir_parent, cluster_name)
    run_cmd(f"mkdir -p {cluster_path}")
    pull_secret_path = os.path.join(templating.TOP_DIR, "data", "pull-secret")
    with open(pull_secret_path, "r") as f:
        pull_secret = f.readline()
    custom_env_data.update({
        'pull_secret': pull_secret,
        'cluster_name': cluster_name,
    })
    if custom_env_data:
        env_data.update(custom_env_data)

    # TODO: check for supported platform and raise the exception if not
    # supported. Currently we support just AWS.

    _templating = templating.Templating()
    template = _templating.render_template("install-config.yaml.j2", env_data)
    log.info(f"Install config: \n{template}")
    install_config = os.path.join(cluster_path, "install-config.yaml")
    with open(install_config, "w") as f:
        f.write(template)

    # Download installer
    version = config.get('installer-version', default.INSTALLER_VERSION)
    installer = download_openshift_installer(version)

    # Deploy cluster
    log.info("Deploying cluster")
    run_cmd(f"./{installer} create cluster "
            f"--dir {cluster_path} "
            f"--log-level debug")

    # Test cluster access
    if not OCP.set_kubeconfig(
            os.path.join(cluster_path, default.KUBECONFIG_LOCATION)):
        return TestStatus.FAILED

    # TODO: Create cluster object, add to test_data for other tests to utilize
    # Determine worker pattern and create ebs volumes
    with open(os.path.join(cluster_path, "terraform.tfvars")) as f:
        tfvars = json.load(f)

    cluster_id = tfvars['cluster_id']
    worker_pattern = f'{cluster_id}-worker*'
    log.info(f'Worker pattern: {worker_pattern}')
    create_ebs_volumes(worker_pattern, region_name=env_data['region'])

    # render templates and create resources
    create_oc_resource('common.yaml', cluster_path, _templating, env_data)
    run_cmd(f'oc label namespace {env_data["cluster_namespace"]} '
            f'"openshift.io/cluster-monitoring=true"')
    run_cmd(f"oc policy add-role-to-user view "
            f"system:serviceaccount:openshift-monitoring:prometheus-k8s "
            f"-n {env_data['cluster_namespace']}")
    create_oc_resource('operator-openshift.yaml', cluster_path, _templating,
                       env_data)
    # Increased to 10 seconds as 5 is not enough
    # TODO: do the sampler function and check if resource exist
    wait_time = 10
    log.info(f"Waiting {wait_time} seconds...")
    time.sleep(wait_time)
    run_cmd(f"oc wait --for condition=ready pod "
            f"-l app=rook-ceph-operator "
            f"-n {env_data['cluster_namespace']} "
            f"--timeout=120s")
    run_cmd(f"oc wait --for condition=ready pod "
            f"-l app=rook-ceph-agent "
            f"-n {env_data['cluster_namespace']} "
            f"--timeout=120s")
    run_cmd(f"oc wait --for condition=ready pod "
            f"-l app=rook-discover "
            f"-n {env_data['cluster_namespace']} "
            f"--timeout=120s")
    create_oc_resource('cluster.yaml', cluster_path, _templating, env_data)
    create_oc_resource('toolbox.yaml', cluster_path, _templating, env_data)
    log.info(f"Waiting {wait_time} seconds...")
    time.sleep(wait_time)
    create_oc_resource('storage-manifest.yaml', cluster_path, _templating,
                       env_data)
    create_oc_resource("service-monitor.yaml", cluster_path, _templating,
                       env_data)
    create_oc_resource("prometheus-rules.yaml", cluster_path, _templating,
                       env_data)

    # Verify health of ceph cluster
    # TODO: move destroy cluster logic to new CLI usage pattern?
    log.info("Done creating rook resources, waiting for HEALTH_OK")
    rc = ceph_health_check(namespace=env_data['cluster_namespace'])

    return rc
Esempio n. 11
0
    def test_deployment(self):
        log.info("Running OCS basic installation")
        cluster_path = ENV_DATA['cluster_path']
        # Test cluster access and if exist just skip the deployment.
        if RUN['cli_params'].get('cluster_path') and OCP.set_kubeconfig(
            os.path.join(cluster_path, RUN.get('kubeconfig_location'))
        ):
            pytest.skip(
                "The installation is skipped cause the cluster is running"
            )

        # Generate install-config from template
        log.info("Generating install-config")
        run_cmd(f"mkdir -p {cluster_path}")
        pull_secret_path = os.path.join(
            TOP_DIR,
            "data",
            "pull-secret"
        )

        # TODO: check for supported platform and raise the exception if not
        # supported. Currently we support just AWS.

        _templating = templating.Templating()
        install_config_str = _templating.render_template(
            "install-config.yaml.j2", ENV_DATA
        )
        # Parse the rendered YAML so that we can manipulate the object directly
        install_config_obj = yaml.safe_load(install_config_str)
        with open(pull_secret_path, "r") as f:
            # Parse, then unparse, the JSON file.
            # We do this for two reasons: to ensure it is well-formatted, and
            # also to ensure it ends up as a single line.
            install_config_obj['pullSecret'] = json.dumps(json.loads(f.read()))
        install_config_str = yaml.safe_dump(install_config_obj)
        log.info(f"Install config: \n{install_config_str}")
        install_config = os.path.join(cluster_path, "install-config.yaml")
        with open(install_config, "w") as f:
            f.write(install_config_str)

        # Download installer
        installer = get_openshift_installer(
            DEPLOYMENT['installer_version']
        )
        # Download client
        get_openshift_client()

        # Deploy cluster
        log.info("Deploying cluster")
        run_cmd(
            f"{installer} create cluster "
            f"--dir {cluster_path} "
            f"--log-level debug"
        )

        # Test cluster access
        if not OCP.set_kubeconfig(
            os.path.join(cluster_path, RUN.get('kubeconfig_location'))
        ):
            pytest.fail("Cluster is not available!")

        # TODO: Create cluster object, add to ENV_DATA for other tests to
        # utilize.
        # Determine worker pattern and create ebs volumes
        with open(os.path.join(cluster_path, "terraform.tfvars")) as f:
            tfvars = json.load(f)

        cluster_id = tfvars['cluster_id']
        worker_pattern = f'{cluster_id}-worker*'
        log.info(f'Worker pattern: {worker_pattern}')
        create_ebs_volumes(worker_pattern, region_name=ENV_DATA['region'])

        # render templates and create resources
        create_oc_resource('common.yaml', cluster_path, _templating, ENV_DATA)
        run_cmd(
            f'oc label namespace {ENV_DATA["cluster_namespace"]} '
            f'"openshift.io/cluster-monitoring=true"'
        )
        run_cmd(
            f"oc policy add-role-to-user view "
            f"system:serviceaccount:openshift-monitoring:prometheus-k8s "
            f"-n {ENV_DATA['cluster_namespace']}"
        )
        apply_oc_resource(
            'csi-nodeplugin-rbac_rbd.yaml',
            cluster_path,
            _templating,
            ENV_DATA,
            template_dir="ocs-deployment/csi/rbd/"
        )
        apply_oc_resource(
            'csi-provisioner-rbac_rbd.yaml',
            cluster_path,
            _templating,
            ENV_DATA,
            template_dir="ocs-deployment/csi/rbd/"
        )
        apply_oc_resource(
            'csi-nodeplugin-rbac_cephfs.yaml',
            cluster_path,
            _templating,
            ENV_DATA,
            template_dir="ocs-deployment/csi/cephfs/"
        )
        apply_oc_resource(
            'csi-provisioner-rbac_cephfs.yaml',
            cluster_path,
            _templating,
            ENV_DATA,
            template_dir="ocs-deployment/csi/cephfs/"
        )
        # Increased to 15 seconds as 10 is not enough
        # TODO: do the sampler function and check if resource exist
        wait_time = 15
        log.info(f"Waiting {wait_time} seconds...")
        time.sleep(wait_time)
        create_oc_resource(
            'operator-openshift-with-csi.yaml', cluster_path, _templating, ENV_DATA
        )
        log.info(f"Waiting {wait_time} seconds...")
        time.sleep(wait_time)
        run_cmd(
            f"oc wait --for condition=ready pod "
            f"-l app=rook-ceph-operator "
            f"-n {ENV_DATA['cluster_namespace']} "
            f"--timeout=120s"
        )
        run_cmd(
            f"oc wait --for condition=ready pod "
            f"-l app=rook-ceph-agent "
            f"-n {ENV_DATA['cluster_namespace']} "
            f"--timeout=120s"
        )
        run_cmd(
            f"oc wait --for condition=ready pod "
            f"-l app=rook-discover "
            f"-n {ENV_DATA['cluster_namespace']} "
            f"--timeout=120s"
        )
        create_oc_resource('cluster.yaml', cluster_path, _templating, ENV_DATA)
        create_oc_resource('toolbox.yaml', cluster_path, _templating, ENV_DATA)
        log.info(f"Waiting {wait_time} seconds...")
        time.sleep(wait_time)
        create_oc_resource(
            'storage-manifest.yaml', cluster_path, _templating, ENV_DATA
        )
        create_oc_resource(
            "service-monitor.yaml", cluster_path, _templating, ENV_DATA
        )
        create_oc_resource(
            "prometheus-rules.yaml", cluster_path, _templating, ENV_DATA
        )
        log.info(f"Waiting {wait_time} seconds...")
        time.sleep(wait_time)

        # Verify health of ceph cluster
        # TODO: move destroy cluster logic to new CLI usage pattern?
        log.info("Done creating rook resources, waiting for HEALTH_OK")
        assert ceph_health_check(namespace=ENV_DATA['cluster_namespace'])