Ejemplo n.º 1
0
        def deploy(self, log_cli_level="DEBUG"):
            """
            Deployment specific to OCP cluster on this platform

            Args:
                log_cli_level (str): openshift installer's log level
                    (default: "DEBUG")
            """

            logger.info("Deploying OCP cluster")
            logger.info(
                f"Openshift-installer will be using loglevel:{log_cli_level}")
            try:
                run_cmd(
                    f"{self.installer} create cluster "
                    f"--dir {self.cluster_path} "
                    f"--log-level {log_cli_level}",
                    timeout=3600,
                )
            except CommandFailed as e:
                if constants.GATHER_BOOTSTRAP_PATTERN in str(e):
                    try:
                        gather_bootstrap()
                    except Exception as ex:
                        logger.error(ex)
                raise e
            self.test_cluster()
Ejemplo n.º 2
0
    def deploy(self, log_cli_level="DEBUG"):
        """
        Deployment specific to OCP cluster for on-prem platform

        Args:
            log_cli_level (str): openshift installer's log level
                (default: "DEBUG")

        """
        logger.info("Deploying OCP cluster")
        install_timeout = config.DEPLOYMENT.get("openshift_install_timeout")
        logger.info(
            f"Running openshift-install with '{log_cli_level}' log level "
            f"and {install_timeout} second timeout"
        )
        try:
            run_cmd(
                f"{self.installer} create cluster "
                f"--dir {self.cluster_path} "
                f"--log-level {log_cli_level}",
                timeout=install_timeout,
            )
        except (exceptions.CommandFailed, subprocess.TimeoutExpired) as e:
            if constants.GATHER_BOOTSTRAP_PATTERN in str(e):
                try:
                    gather_bootstrap()
                except Exception as ex:
                    logger.error(ex)
            raise e
        self.test_cluster()
Ejemplo n.º 3
0
        def deploy(self, log_cli_level='DEBUG'):
            """
            Exact deployment will happen here

            Args:
                log_cli_level (str): openshift installer's log level
                    (default: "DEBUG")
            """
            logger.info("Deploying OCP cluster")
            logger.info(
                f"Openshift-installer will be using loglevel:{log_cli_level}")

            # Invoke UPI on AWS install script
            cidir = os.getcwd()
            logger.info("Changing CWD")
            try:
                os.chdir(self.upi_script_path)
            except OSError:
                logger.exception(
                    f"Failed to change CWD to {self.upi_script_path} ")
            logger.info(f"CWD changed to {self.upi_script_path}")

            with open(f"./{constants.UPI_INSTALL_SCRIPT}", "r") as fd:
                buf = fd.read()
            data = buf.replace("openshift-qe-upi-1", "ocs-qe-upi")
            with open(f"./{constants.UPI_INSTALL_SCRIPT}", "w") as fd:
                fd.write(data)

            logger.info("Executing UPI install script")
            proc = Popen([
                os.path.join(self.upi_script_path,
                             constants.UPI_INSTALL_SCRIPT)
            ],
                         stdout=PIPE,
                         stderr=PIPE,
                         encoding='utf-8')
            stdout, stderr = proc.communicate()

            # Change dir back to ocs-ci dir
            os.chdir(cidir)

            if proc.returncode:
                logger.error(stderr)
                if constants.GATHER_BOOTSTRAP_PATTERN in stderr:
                    try:
                        gather_bootstrap()
                    except Exception as ex:
                        logger.error(ex)
                raise exceptions.CommandFailed("upi install script failed")
            logger.info(stdout)

            self.test_cluster()

            # Delete openshift-misc repository
            logger.info("Removing openshift-misc directory located at %s",
                        self.upi_repo_path)
            shutil.rmtree(self.upi_repo_path)
Ejemplo n.º 4
0
        def deploy(self, log_cli_level='DEBUG'):
            """
            Deployment specific to OCP cluster on this platform

            Args:
                log_cli_level (str): openshift installer's log level
                    (default: "DEBUG")

            """
            logger.info("Deploying OCP cluster for vSphere platform")
            logger.info(
                f"Openshift-installer will be using loglevel:{log_cli_level}"
            )
            os.chdir(self.terraform_data_dir)
            self.terraform.initialize()
            self.terraform.apply(self.terraform_var)
            os.chdir(self.previous_dir)
            logger.info("waiting for bootstrap to complete")
            try:
                run_cmd(
                    f"{self.installer} wait-for bootstrap-complete "
                    f"--dir {self.cluster_path} "
                    f"--log-level {log_cli_level}",
                    timeout=3600
                )
            except CommandFailed as e:
                if constants.GATHER_BOOTSTRAP_PATTERN in str(e):
                    try:
                        gather_bootstrap()
                    except Exception as ex:
                        logger.error(ex)
                raise e

            logger.info("removing bootstrap node")
            os.chdir(self.terraform_data_dir)
            self.terraform.apply(self.terraform_var, bootstrap_complete=True)
            os.chdir(self.previous_dir)

            OCP.set_kubeconfig(self.kubeconfig)
            # wait for image registry to show-up
            co = "image-registry"
            wait_for_co(co)

            # patch image registry to null
            self.configure_storage_for_image_registry(self.kubeconfig)

            # wait for install to complete
            logger.info("waiting for install to complete")
            run_cmd(
                f"{self.installer} wait-for install-complete "
                f"--dir {self.cluster_path} "
                f"--log-level {log_cli_level}",
                timeout=1800
            )

            self.test_cluster()
Ejemplo n.º 5
0
        def deploy(self, log_cli_level='DEBUG'):
            """
            Deployment specific to OCP cluster on this platform

            Args:
                log_cli_level (str): openshift installer's log level
                    (default: "DEBUG")

            """
            logger.info("Deploying OCP cluster for vSphere platform")
            logger.info(
                f"Openshift-installer will be using loglevel:{log_cli_level}"
            )
            os.chdir(self.terraform_data_dir)
            self.terraform.initialize()
            self.terraform.apply(self.terraform_var)
            os.chdir(self.previous_dir)
            logger.info("waiting for bootstrap to complete")
            try:
                run_cmd(
                    f"{self.installer} wait-for bootstrap-complete "
                    f"--dir {self.cluster_path} "
                    f"--log-level {log_cli_level}",
                    timeout=3600
                )
            except CommandFailed as e:
                if constants.GATHER_BOOTSTRAP_PATTERN in str(e):
                    try:
                        gather_bootstrap()
                    except Exception as ex:
                        logger.error(ex)
                raise e

            if not config.DEPLOYMENT['preserve_bootstrap_node']:
                logger.info("removing bootstrap node")
                os.chdir(self.terraform_data_dir)
                self.terraform.apply(
                    self.terraform_var, bootstrap_complete=True
                )
                os.chdir(self.previous_dir)

            OCP.set_kubeconfig(self.kubeconfig)

            # wait for all nodes to generate CSR
            # From OCP version 4.4 and above, we have to approve CSR manually
            # for all the nodes
            ocp_version = get_ocp_version()
            if Version.coerce(ocp_version) >= Version.coerce('4.4'):
                wait_for_all_nodes_csr_and_approve(timeout=1200, sleep=30)

            # wait for image registry to show-up
            co = "image-registry"
            wait_for_co(co)

            # patch image registry to null
            self.configure_storage_for_image_registry(self.kubeconfig)

            # wait for install to complete
            logger.info("waiting for install to complete")
            run_cmd(
                f"{self.installer} wait-for install-complete "
                f"--dir {self.cluster_path} "
                f"--log-level {log_cli_level}",
                timeout=1800
            )

            # Approving CSRs here in-case if any exists
            approve_pending_csr()

            self.test_cluster()
Ejemplo n.º 6
0
        def deploy(self, log_cli_level="DEBUG"):
            """
            Deploy
            """
            # Uploading pxe files
            logger.info("Deploying OCP cluster for Bare Metal platform")
            logger.info(
                f"Openshift-installer will be using log level:{log_cli_level}")
            upload_file(
                self.host,
                constants.COMMON_CONF_FILE,
                os.path.join(self.helper_node_details["bm_dnsmasq_dir"],
                             "dnsmasq.common.conf"),
                self.user,
                key_file=self.private_key,
            )
            logger.info("Uploading PXE files")
            ocp_version = get_ocp_version()
            float_ocp_version = float(ocp_version)
            for machine in self.mgmt_details:
                if self.mgmt_details[machine].get(
                        "cluster_name") or self.mgmt_details[machine].get(
                            "extra_node"):
                    pxe_file_path = self.create_pxe_files(
                        ocp_version=float_ocp_version,
                        role=self.mgmt_details[machine].get("role"),
                    )
                    upload_file(
                        server=self.host,
                        localpath=pxe_file_path,
                        remotepath=f"{self.helper_node_details['bm_tftp_dir']}"
                        f"/pxelinux.cfg/01-{self.mgmt_details[machine]['mac'].replace(':', '-')}",
                        user=self.user,
                        key_file=self.private_key,
                    )
            # Applying Permission
            cmd = f"chmod 755 -R {self.helper_node_details['bm_tftp_dir']}"
            self.helper_node_handler.exec_cmd(cmd=cmd)

            # Applying Permission
            cmd = f"chmod 755 -R {self.helper_node_details['bm_path_to_upload']}"
            self.helper_node_handler.exec_cmd(cmd=cmd)

            # Restarting dnsmasq service
            cmd = "systemctl restart dnsmasq"
            assert self.helper_node_handler.exec_cmd(
                cmd=cmd), "Failed to restart dnsmasq service"
            # Rebooting Machine with pxe boot

            for machine in self.mgmt_details:
                if (self.mgmt_details[machine].get("cluster_name") ==
                        constants.BM_DEFAULT_CLUSTER_NAME):
                    secrets = [
                        self.mgmt_details[machine]["mgmt_username"],
                        self.mgmt_details[machine]["mgmt_password"],
                    ]
                    # Changes boot prioriy to pxe
                    cmd = (
                        f"ipmitool -I lanplus -U {self.mgmt_details[machine]['mgmt_username']} "
                        f"-P {self.mgmt_details[machine]['mgmt_password']} "
                        f"-H {self.mgmt_details[machine]['mgmt_console']} chassis bootdev pxe"
                    )
                    run_cmd(cmd=cmd, secrets=secrets)
                    sleep(2)
                    # Power On Machine
                    cmd = (
                        f"ipmitool -I lanplus -U {self.mgmt_details[machine]['mgmt_username']} "
                        f"-P {self.mgmt_details[machine]['mgmt_password']} "
                        f"-H {self.mgmt_details[machine]['mgmt_console']} chassis power cycle || "
                        f"ipmitool -I lanplus -U {self.mgmt_details[machine]['mgmt_username']} "
                        f"-P {self.mgmt_details[machine]['mgmt_password']} "
                        f"-H {self.mgmt_details[machine]['mgmt_console']} chassis power on"
                    )
                    run_cmd(cmd=cmd, secrets=secrets)
            logger.info("waiting for bootstrap to complete")
            try:
                run_cmd(
                    f"{self.installer} wait-for bootstrap-complete "
                    f"--dir {self.cluster_path} "
                    f"--log-level {log_cli_level}",
                    timeout=3600,
                )
            except CommandFailed as e:
                if constants.GATHER_BOOTSTRAP_PATTERN in str(e):
                    try:
                        gather_bootstrap()
                    except Exception as ex:
                        logger.error(ex)
                raise e

            OCP.set_kubeconfig(self.kubeconfig)
            wait_for_all_nodes_csr_and_approve()
            # wait for image registry to show-up
            co = "image-registry"
            wait_for_co(co)

            # patch image registry to null
            self.configure_storage_for_image_registry(self.kubeconfig)

            # wait for install to complete
            logger.info("waiting for install to complete")
            run_cmd(
                f"{self.installer} wait-for install-complete "
                f"--dir {self.cluster_path} "
                f"--log-level {log_cli_level}",
                timeout=1800,
            )

            # Approving CSRs here in-case if any exists
            approve_pending_csr()

            self.test_cluster()
            logger.info("Performing Disk cleanup")
            clean_disk()
            # We need NTP for OCS cluster to become clean
            configure_chrony_and_wait_for_machineconfig_status(node_type="all")
Ejemplo n.º 7
0
        def deploy(self, log_cli_level="DEBUG"):
            """
            Deploy
            """
            # Uploading pxe files
            master_count = 0
            worker_count = 0
            logger.info("Deploying OCP cluster for Bare Metal platform")
            logger.info(
                f"Openshift-installer will be using log level:{log_cli_level}")
            upload_file(
                self.host,
                constants.COMMON_CONF_FILE,
                os.path.join(self.helper_node_details["bm_dnsmasq_dir"],
                             "dnsmasq.common.conf"),
                self.user,
                key_file=self.private_key,
            )
            logger.info("Uploading PXE files")
            ocp_version = get_ocp_version()
            float_ocp_version = float(ocp_version)
            for machine in self.mgmt_details:
                if self.mgmt_details[machine].get(
                        "cluster_name") or self.mgmt_details[machine].get(
                            "extra_node"):
                    pxe_file_path = self.create_pxe_files(
                        ocp_version=float_ocp_version,
                        role=self.mgmt_details[machine].get("role"),
                    )
                    upload_file(
                        server=self.host,
                        localpath=pxe_file_path,
                        remotepath=f"{self.helper_node_details['bm_tftp_dir']}"
                        f"/pxelinux.cfg/01-{self.mgmt_details[machine]['mac'].replace(':', '-')}",
                        user=self.user,
                        key_file=self.private_key,
                    )
            # Applying Permission
            cmd = f"chmod 755 -R {self.helper_node_details['bm_tftp_dir']}"
            self.helper_node_handler.exec_cmd(cmd=cmd)

            # Applying Permission
            cmd = f"chmod 755 -R {self.helper_node_details['bm_path_to_upload']}"
            self.helper_node_handler.exec_cmd(cmd=cmd)

            # Restarting dnsmasq service
            cmd = "systemctl restart dnsmasq"
            assert self.helper_node_handler.exec_cmd(
                cmd=cmd), "Failed to restart dnsmasq service"
            # Rebooting Machine with pxe boot
            api_record_ip_list = []
            apps_record_ip_list = []
            response_list = []
            cluster_name = f"{constants.BM_DEFAULT_CLUSTER_NAME}"
            self.aws.delete_hosted_zone(cluster_name=cluster_name,
                                        delete_zone=False)
            for machine in self.mgmt_details:
                if (self.mgmt_details[machine].get("cluster_name") ==
                        constants.BM_DEFAULT_CLUSTER_NAME):
                    if (self.mgmt_details[machine]["role"] ==
                            constants.BOOTSTRAP_MACHINE):
                        self.set_pxe_boot_and_reboot(machine)
                        bootstrap_ip = self.mgmt_details[machine]["ip"]
                        api_record_ip_list.append(
                            self.mgmt_details[machine]["ip"])

                    elif (self.mgmt_details[machine]["role"]
                          == constants.MASTER_MACHINE and
                          master_count < config.ENV_DATA["master_replicas"]):
                        self.set_pxe_boot_and_reboot(machine)
                        api_record_ip_list.append(
                            self.mgmt_details[machine]["ip"])
                        master_count += 1

                    elif (self.mgmt_details[machine]["role"]
                          == constants.WORKER_MACHINE and
                          worker_count < config.ENV_DATA["worker_replicas"]):
                        self.set_pxe_boot_and_reboot(machine)
                        apps_record_ip_list.append(
                            self.mgmt_details[machine]["ip"])
                        worker_count += 1

            logger.info("Configuring DNS records")
            zone_id = self.aws.get_hosted_zone_id(cluster_name=cluster_name)

            if config.ENV_DATA["worker_replicas"] == 0:
                apps_record_ip_list = api_record_ip_list
            for ip in api_record_ip_list:
                response_list.append(
                    self.aws.update_hosted_zone_record(
                        zone_id=zone_id,
                        record_name=f"api-int.{cluster_name}",
                        data=ip,
                        type="A",
                        operation_type="Add",
                    ))
                response_list.append(
                    self.aws.update_hosted_zone_record(
                        zone_id=zone_id,
                        record_name=f"api.{cluster_name}",
                        data=ip,
                        type="A",
                        operation_type="Add",
                    ))
            for ip in apps_record_ip_list:
                response_list.append(
                    self.aws.update_hosted_zone_record(
                        zone_id=zone_id,
                        record_name=f"*.apps.{cluster_name}",
                        data=ip,
                        type="A",
                        operation_type="Add",
                    ))

            logger.info("Waiting for Record Response")
            self.aws.wait_for_record_set(response_list=response_list)
            logger.info("Records Created Successfully")
            logger.info("waiting for bootstrap to complete")
            try:
                run_cmd(
                    f"{self.installer} wait-for bootstrap-complete "
                    f"--dir {self.cluster_path} "
                    f"--log-level {log_cli_level}",
                    timeout=3600,
                )
            except CommandFailed as e:
                if constants.GATHER_BOOTSTRAP_PATTERN in str(e):
                    try:
                        gather_bootstrap()
                    except Exception as ex:
                        logger.error(ex)
                raise e

            OCP.set_kubeconfig(self.kubeconfig)
            wait_for_all_nodes_csr_and_approve()
            # wait for image registry to show-up
            co = "image-registry"
            wait_for_co(co)

            # patch image registry to null
            self.configure_storage_for_image_registry(self.kubeconfig)

            # wait for install to complete
            logger.info("waiting for install to complete")
            run_cmd(
                f"{self.installer} wait-for install-complete "
                f"--dir {self.cluster_path} "
                f"--log-level {log_cli_level}",
                timeout=1800,
            )
            logger.info("Removing Bootstrap Ip for DNS Records")
            self.aws.update_hosted_zone_record(
                zone_id=zone_id,
                record_name=f"api-int.{cluster_name}",
                data=bootstrap_ip,
                type="A",
                operation_type="Delete",
            )
            self.aws.update_hosted_zone_record(
                zone_id=zone_id,
                record_name=f"api.{cluster_name}",
                data=bootstrap_ip,
                type="A",
                operation_type="Delete",
            )
            # Approving CSRs here in-case if any exists
            approve_pending_csr()

            self.test_cluster()
            logger.info("Performing Disk cleanup")
            clean_disk()
            # We need NTP for OCS cluster to become clean
            worker_timeout = 400 * config.ENV_DATA["worker_replicas"]
            master_timeout = 400 * config.ENV_DATA["master_replicas"]
            if master_timeout <= worker_timeout:
                chrony_timeout = worker_timeout
            else:
                chrony_timeout = master_timeout
            configure_chrony_and_wait_for_machineconfig_status(
                node_type="all", timeout=chrony_timeout)