Exemple #1
0
    def approve_csrs(kubeconfig_path: str, done: threading.Event):
        log.info(
            "Started background worker to approve CSRs when they appear...")
        while not done.is_set():
            unapproved_csrs = []
            try:
                unapproved_csrs = get_unapproved_csr_names(kubeconfig_path)
            except subprocess.SubprocessError:
                log.debug(
                    "Failed to list csrs. This is usually due to API downtime. Retrying"
                )
            except Exception:
                # We're in a thread so it's a bit awkward to stop everything else...
                # Just continue after logging the unexpected exception
                log.exception("Unknown exception while listing csrs")

            for csr_name in unapproved_csrs:
                log.info(f"Found unapproved CSR {csr_name}, approving...")

                try:
                    approve_csr(kubeconfig_path, csr_name)
                except subprocess.SubprocessError:
                    log.warning(
                        "Failed attempt to approve CSR, this may be due to API downtime. Will retry later"
                    )
                except Exception:
                    # We're in a thread so it's a bit awkward to stop everything else...
                    # Just continue after logging the unexpected exception
                    log.exception(
                        f"Unknown exception while approving the {csr_name} CSR"
                    )

            time.sleep(10)
    def _get_domain_ips_and_macs(
            domain: libvirt.virDomain) -> Tuple[List[str], List[str]]:
        interfaces_sources = [
            # getting all DHCP leases IPs
            libvirt.VIR_DOMAIN_INTERFACE_ADDRESSES_SRC_LEASE,
            # getting static IPs via ARP
            libvirt.VIR_DOMAIN_INTERFACE_ADDRESSES_SRC_ARP,
        ]

        interfaces = {}
        for addresses_source in interfaces_sources:
            try:
                interfaces.update(
                    **domain.interfaceAddresses(addresses_source))
            except libvirt.libvirtError:
                log.exception(
                    "Got an error while updating domain's network addresses")

        ips = []
        macs = []
        log.debug(f"Host {domain.name()} interfaces are {interfaces}")
        if interfaces:
            for (_, val) in interfaces.items():
                if val["addrs"]:
                    for addr in val["addrs"]:
                        ips.append(addr["addr"])
                        macs.append(val["hwaddr"])
        if ips:
            log.info("Host %s ips are %s", domain.name(), ips)
        if macs:
            log.info("Host %s macs are %s", domain.name(), macs)
        return ips, macs
Exemple #3
0
    def cluster(
        self,
        api_client: InventoryClient,
        request: FixtureRequest,
        infra_env_configuration: InfraEnvConfig,
        proxy_server,
        prepare_nodes_network: Nodes,
        cluster_configuration: ClusterConfig,
        ipxe_server,
    ):
        log.debug(
            f"--- SETUP --- Creating cluster for test: {request.node.name}\n")
        cluster = Cluster(
            api_client=api_client,
            config=cluster_configuration,
            infra_env_config=infra_env_configuration,
            nodes=prepare_nodes_network,
        )

        if self._does_need_proxy_server(prepare_nodes_network):
            self.__set_up_proxy_server(cluster, cluster_configuration,
                                       proxy_server)

        if global_variables.ipxe_boot:
            infra_env = cluster.generate_infra_env()
            ipxe_server_controller = ipxe_server(name="ipxe_controller",
                                                 api_client=cluster.api_client)
            ipxe_server_controller.start(infra_env_id=infra_env.id,
                                         cluster_name=cluster.name)

            ipxe_server_url = f"http://{consts.DEFAULT_IPXE_SERVER_IP}:{consts.DEFAULT_IPXE_SERVER_PORT}/{cluster.name}"
            network_name = cluster.nodes.get_cluster_network()
            libvirt_controller = LibvirtController(
                config=cluster.nodes.controller,
                entity_config=cluster_configuration)
            libvirt_controller.set_ipxe_url(network_name=network_name,
                                            ipxe_url=ipxe_server_url)

        yield cluster

        if self._is_test_failed(request):
            log.info(
                f"--- TEARDOWN --- Collecting Logs for test: {request.node.name}\n"
            )
            self.collect_test_logs(cluster, api_client, request, cluster.nodes)

            if global_variables.test_teardown:
                if cluster.is_installing() or cluster.is_finalizing():
                    cluster.cancel_install()

        if global_variables.test_teardown:
            with SuppressAndLog(ApiException):
                cluster.deregister_infraenv()

            with suppress(ApiException):
                log.info(
                    f"--- TEARDOWN --- deleting created cluster {cluster.id}\n"
                )
                cluster.delete()
Exemple #4
0
    def cluster(
        self,
        api_client: InventoryClient,
        request: FixtureRequest,
        infra_env_configuration: InfraEnvConfig,
        proxy_server,
        prepare_nodes_network: Nodes,
        cluster_configuration: ClusterConfig,
        ipxe_server: Callable,
        tang_server: Callable,
    ):
        log.debug(
            f"--- SETUP --- Creating cluster for test: {request.node.name}\n")
        if cluster_configuration.disk_encryption_mode == consts.DiskEncryptionMode.TANG:
            self._start_tang_server(tang_server, cluster_configuration)

        cluster = Cluster(
            api_client=api_client,
            config=cluster_configuration,
            infra_env_config=infra_env_configuration,
            nodes=prepare_nodes_network,
        )

        if self._does_need_proxy_server(prepare_nodes_network):
            self.__set_up_proxy_server(cluster, cluster_configuration,
                                       proxy_server)

        if global_variables.ipxe_boot:
            infra_env = cluster.generate_infra_env()
            ipxe_server_controller = ipxe_server(name="ipxe_controller",
                                                 api_client=cluster.api_client)
            ipxe_server_controller.run(infra_env_id=infra_env.id,
                                       cluster_name=cluster.name)
            cluster_configuration.iso_download_path = utils.get_iso_download_path(
                infra_env_configuration.entity_name.get())

        yield cluster

        if self._is_test_failed(request):
            log.info(
                f"--- TEARDOWN --- Collecting Logs for test: {request.node.name}\n"
            )
            self.collect_test_logs(cluster, api_client, request, cluster.nodes)

            if global_variables.test_teardown:
                if cluster.is_installing() or cluster.is_finalizing():
                    cluster.cancel_install()

        if global_variables.test_teardown:
            with SuppressAndLog(ApiException):
                cluster.deregister_infraenv()

            with suppress(ApiException):
                log.info(
                    f"--- TEARDOWN --- deleting created cluster {cluster.id}\n"
                )
                cluster.delete()
Exemple #5
0
def _safe_run(job, job_id: int, done_handler: Callable[[int], None]):
    call = None
    try:
        call, call_args = job[0], job[1:]
        return call(*call_args)
    except BaseException:
        log.debug("When concurrently running '%(call)s'", dict(call=str(call)))
        raise
    finally:
        if done_handler:
            done_handler(job_id)
    def _remove_taken_assets_from_all_assets_in_use(self,
                                                    assets_in_use: List[Dict]):
        log.info("Returning %d assets", len(self._taken_assets))
        log.debug("Assets to return: %s", self._taken_assets)

        indexes_to_pop = []
        for i in range(len(assets_in_use)):
            if str(assets_in_use[i]) in self._taken_assets:
                indexes_to_pop.append(i)

        while indexes_to_pop:
            assets_in_use.pop(indexes_to_pop.pop())

        self._taken_assets.clear()
Exemple #7
0
    def worker_ready() -> bool:
        try:
            node_readiness_map = get_nodes_readiness(KUBE_CONFIG)
        except subprocess.SubprocessError:
            log.debug("Failed to list nodes. This is usually due to API downtime. Retrying")
            return False

        if f"{CLUSTER_PREFIX}-master-0" not in node_readiness_map:
            log.warning("Couldn't find master in node status list, this should not happen")
            return False

        if f"{CLUSTER_PREFIX}-worker-0" not in node_readiness_map:
            return False

        return all(node_status for node_status in node_readiness_map.values())
Exemple #8
0
    def day2_cluster(
        self,
        api_client: InventoryClient,
        request: FixtureRequest,
        day2_cluster_configuration: Day2ClusterConfig,
    ):
        log.debug(
            f"--- SETUP --- Creating Day2 cluster for test: {request.node.name}\n"
        )
        cluster = Day2Cluster(
            api_client=api_client,
            config=day2_cluster_configuration,
        )

        yield cluster
    def handle_trigger(self, conditions_string: List[str],
                       values: Dict[str, Any]) -> None:
        for k, v in values.items():
            if not hasattr(self, k):
                continue

            if not self.is_user_set(k):
                log.debug(
                    f"{self.__class__.__name__} - Trigger set `{k}` to `{v}`, Condition: {conditions_string}"
                )
                self._set(k, v)
            else:
                log.warning(
                    f"Skipping setting {k} to value {v} due that it already been set by the user"
                )
Exemple #10
0
        def get_cluster_func(nodes: Nodes,
                             cluster_config: ClusterConfig) -> Cluster:
            log.debug(
                f"--- SETUP --- Creating cluster for test: {request.node.name}\n"
            )
            _cluster = Cluster(api_client=api_client,
                               config=cluster_config,
                               nodes=nodes,
                               infra_env_config=infra_env_configuration)

            if self._does_need_proxy_server(nodes):
                self.__set_up_proxy_server(_cluster, cluster_config,
                                           proxy_server)

            clusters.append(_cluster)
            return _cluster
Exemple #11
0
    def _get_disk_encryption_appliance(self):
        if isinstance(self._entity_config, BaseInfraEnvConfig):
            log.debug("Infra-env is not associated with any disk-encryption configuration")
            return {}

        assert (
            self._entity_config.disk_encryption_mode == consts.DiskEncryptionMode.TPM_VERSION_2
        ), "Currently only supporting TPMv2"

        master_vtpm2 = worker_vtpm2 = False

        if self._entity_config.disk_encryption_roles == consts.DiskEncryptionRoles.ALL:
            master_vtpm2 = worker_vtpm2 = True
        elif self._entity_config.disk_encryption_roles == consts.DiskEncryptionRoles.MASTERS:
            master_vtpm2 = True
        elif self._entity_config.disk_encryption_roles == consts.DiskEncryptionRoles.WORKERS:
            worker_vtpm2 = True

        return {"master_vtpm2": master_vtpm2, "worker_vtpm2": worker_vtpm2}
Exemple #12
0
    def cluster(
        self,
        api_client: InventoryClient,
        request: FixtureRequest,
        infra_env_configuration: InfraEnvConfig,
        proxy_server,
        prepare_nodes_network: Nodes,
        cluster_configuration: ClusterConfig,
    ):
        log.debug(
            f"--- SETUP --- Creating cluster for test: {request.node.name}\n")
        cluster = Cluster(
            api_client=api_client,
            config=cluster_configuration,
            infra_env_config=infra_env_configuration,
            nodes=prepare_nodes_network,
        )

        if self._does_need_proxy_server(prepare_nodes_network):
            self._set_up_proxy_server(cluster, cluster_configuration,
                                      proxy_server)

        yield cluster

        if self._is_test_failed(request):
            log.info(
                f"--- TEARDOWN --- Collecting Logs for test: {request.node.name}\n"
            )
            self.collect_test_logs(cluster, api_client, request, cluster.nodes)

            if global_variables.test_teardown:
                if cluster.is_installing() or cluster.is_finalizing():
                    cluster.cancel_install()

        if global_variables.test_teardown:
            with SuppressAndLog(ApiException):
                cluster.deregister_infraenv()

            with suppress(ApiException):
                log.info(
                    f"--- TEARDOWN --- deleting created cluster {cluster.id}\n"
                )
                cluster.delete()
Exemple #13
0
    def update_parameterized(cls, request: pytest.FixtureRequest,
                             config: BaseConfig):
        """Update the given configuration object with parameterized values if the key is present"""

        config_type = config.__class__.__name__
        parameterized_keys = cls._get_parameterized_keys(request)

        for fixture_name in parameterized_keys:
            with suppress(pytest.FixtureLookupError, AttributeError):
                if hasattr(config, fixture_name):
                    value = request.getfixturevalue(fixture_name)
                    config.set_value(fixture_name, value)

                    log.debug(
                        f"{config_type}.{fixture_name} value updated from parameterized value to {value}"
                    )
                else:
                    raise AttributeError(
                        f"No attribute name {fixture_name} in {config_type} object type"
                    )
Exemple #14
0
    def infra_env(
        self,
        api_client: InventoryClient,
        request: FixtureRequest,
        proxy_server,
        prepare_infraenv_nodes_network: Nodes,
        infra_env_configuration: InfraEnvConfig,
    ):
        log.debug(
            f"--- SETUP --- Creating InfraEnv for test: {request.node.name}\n")
        infra_env = InfraEnv(api_client=api_client,
                             config=infra_env_configuration,
                             nodes=prepare_infraenv_nodes_network)

        yield infra_env
        log.info("--- TEARDOWN --- Infra env\n")

        if global_variables.test_teardown:
            with SuppressAndLog(ApiException):
                infra_env.deregister()
Exemple #15
0
 def execute(self, command, timeout=60, verbose=True):
     if not self._ssh_client:
         self.connect()
     if verbose:
         name = getattr(self._ssh_client, "name", "")
         log.debug(
             f"Running bash script: {command.strip()} {'on ' + name if name else name}"
         )
     stdin, stdout, stderr = self._ssh_client.exec_command(command,
                                                           timeout=timeout)
     status = stdout.channel.recv_exit_status()
     output = stdout.readlines()
     output = "".join(output)
     if verbose and output:
         log.debug(f"SSH Execution output: \n{output}")
     if status != 0:
         e = RuntimeError(
             f"Failed executing, status '{status}', output was:\n{output} stderr \n{stderr.readlines()}"
         )
         e.output = output
         raise e
     return output
Exemple #16
0
def download_must_gather(kubeconfig: str, dest_dir: str):
    must_gather_dir = f"{dest_dir}/must-gather-dir"
    os.mkdir(must_gather_dir)

    log.info(
        f"Downloading must-gather to {must_gather_dir}, kubeconfig {kubeconfig}"
    )
    command = (
        f"oc --insecure-skip-tls-verify --kubeconfig={kubeconfig} adm must-gather"
        f" --dest-dir {must_gather_dir} > {must_gather_dir}/must-gather.log")
    try:
        run_command(command, shell=True, raise_errors=True)

    except RuntimeError as ex:
        log.warning(f"Failed to run must gather: {ex}")

    log.debug("Archiving %s...", must_gather_dir)
    with tarfile.open(f"{dest_dir}/must-gather.tar", "w:gz") as tar:
        tar.add(must_gather_dir, arcname=os.path.sep)

    log.debug("Removing must-gather directory %s after we archived it",
              must_gather_dir)
    shutil.rmtree(must_gather_dir)
Exemple #17
0
    def all_operators_available() -> bool:
        try:
            operator_statuses = get_clusteroperators_status(KUBE_CONFIG)
        except subprocess.SubprocessError:
            log.debug("Failed to get cluster operators status. This is usually due to API downtime. Retrying")
            return False

        if len(operator_statuses) == 0:
            log.debug("List of operators seems to still be empty... Retrying")
            return False

        if not all(available for available in operator_statuses.values()):
            log.debug(
                "Following operators are still down: %s",
                ", ".join(operator for operator, available in operator_statuses.items() if not available),
            )
            return False

        return True