def approve_csrs(kubeconfig_path: str, done: threading.Event): log.info( "Started background worker to approve CSRs when they appear...") while not done.is_set(): unapproved_csrs = [] try: unapproved_csrs = get_unapproved_csr_names(kubeconfig_path) except subprocess.SubprocessError: log.debug( "Failed to list csrs. This is usually due to API downtime. Retrying" ) except Exception: # We're in a thread so it's a bit awkward to stop everything else... # Just continue after logging the unexpected exception log.exception("Unknown exception while listing csrs") for csr_name in unapproved_csrs: log.info(f"Found unapproved CSR {csr_name}, approving...") try: approve_csr(kubeconfig_path, csr_name) except subprocess.SubprocessError: log.warning( "Failed attempt to approve CSR, this may be due to API downtime. Will retry later" ) except Exception: # We're in a thread so it's a bit awkward to stop everything else... # Just continue after logging the unexpected exception log.exception( f"Unknown exception while approving the {csr_name} CSR" ) time.sleep(10)
def _get_domain_ips_and_macs( domain: libvirt.virDomain) -> Tuple[List[str], List[str]]: interfaces_sources = [ # getting all DHCP leases IPs libvirt.VIR_DOMAIN_INTERFACE_ADDRESSES_SRC_LEASE, # getting static IPs via ARP libvirt.VIR_DOMAIN_INTERFACE_ADDRESSES_SRC_ARP, ] interfaces = {} for addresses_source in interfaces_sources: try: interfaces.update( **domain.interfaceAddresses(addresses_source)) except libvirt.libvirtError: log.exception( "Got an error while updating domain's network addresses") ips = [] macs = [] log.debug(f"Host {domain.name()} interfaces are {interfaces}") if interfaces: for (_, val) in interfaces.items(): if val["addrs"]: for addr in val["addrs"]: ips.append(addr["addr"]) macs.append(val["hwaddr"]) if ips: log.info("Host %s ips are %s", domain.name(), ips) if macs: log.info("Host %s macs are %s", domain.name(), macs) return ips, macs
def cluster( self, api_client: InventoryClient, request: FixtureRequest, infra_env_configuration: InfraEnvConfig, proxy_server, prepare_nodes_network: Nodes, cluster_configuration: ClusterConfig, ipxe_server, ): log.debug( f"--- SETUP --- Creating cluster for test: {request.node.name}\n") cluster = Cluster( api_client=api_client, config=cluster_configuration, infra_env_config=infra_env_configuration, nodes=prepare_nodes_network, ) if self._does_need_proxy_server(prepare_nodes_network): self.__set_up_proxy_server(cluster, cluster_configuration, proxy_server) if global_variables.ipxe_boot: infra_env = cluster.generate_infra_env() ipxe_server_controller = ipxe_server(name="ipxe_controller", api_client=cluster.api_client) ipxe_server_controller.start(infra_env_id=infra_env.id, cluster_name=cluster.name) ipxe_server_url = f"http://{consts.DEFAULT_IPXE_SERVER_IP}:{consts.DEFAULT_IPXE_SERVER_PORT}/{cluster.name}" network_name = cluster.nodes.get_cluster_network() libvirt_controller = LibvirtController( config=cluster.nodes.controller, entity_config=cluster_configuration) libvirt_controller.set_ipxe_url(network_name=network_name, ipxe_url=ipxe_server_url) yield cluster if self._is_test_failed(request): log.info( f"--- TEARDOWN --- Collecting Logs for test: {request.node.name}\n" ) self.collect_test_logs(cluster, api_client, request, cluster.nodes) if global_variables.test_teardown: if cluster.is_installing() or cluster.is_finalizing(): cluster.cancel_install() if global_variables.test_teardown: with SuppressAndLog(ApiException): cluster.deregister_infraenv() with suppress(ApiException): log.info( f"--- TEARDOWN --- deleting created cluster {cluster.id}\n" ) cluster.delete()
def cluster( self, api_client: InventoryClient, request: FixtureRequest, infra_env_configuration: InfraEnvConfig, proxy_server, prepare_nodes_network: Nodes, cluster_configuration: ClusterConfig, ipxe_server: Callable, tang_server: Callable, ): log.debug( f"--- SETUP --- Creating cluster for test: {request.node.name}\n") if cluster_configuration.disk_encryption_mode == consts.DiskEncryptionMode.TANG: self._start_tang_server(tang_server, cluster_configuration) cluster = Cluster( api_client=api_client, config=cluster_configuration, infra_env_config=infra_env_configuration, nodes=prepare_nodes_network, ) if self._does_need_proxy_server(prepare_nodes_network): self.__set_up_proxy_server(cluster, cluster_configuration, proxy_server) if global_variables.ipxe_boot: infra_env = cluster.generate_infra_env() ipxe_server_controller = ipxe_server(name="ipxe_controller", api_client=cluster.api_client) ipxe_server_controller.run(infra_env_id=infra_env.id, cluster_name=cluster.name) cluster_configuration.iso_download_path = utils.get_iso_download_path( infra_env_configuration.entity_name.get()) yield cluster if self._is_test_failed(request): log.info( f"--- TEARDOWN --- Collecting Logs for test: {request.node.name}\n" ) self.collect_test_logs(cluster, api_client, request, cluster.nodes) if global_variables.test_teardown: if cluster.is_installing() or cluster.is_finalizing(): cluster.cancel_install() if global_variables.test_teardown: with SuppressAndLog(ApiException): cluster.deregister_infraenv() with suppress(ApiException): log.info( f"--- TEARDOWN --- deleting created cluster {cluster.id}\n" ) cluster.delete()
def _safe_run(job, job_id: int, done_handler: Callable[[int], None]): call = None try: call, call_args = job[0], job[1:] return call(*call_args) except BaseException: log.debug("When concurrently running '%(call)s'", dict(call=str(call))) raise finally: if done_handler: done_handler(job_id)
def _remove_taken_assets_from_all_assets_in_use(self, assets_in_use: List[Dict]): log.info("Returning %d assets", len(self._taken_assets)) log.debug("Assets to return: %s", self._taken_assets) indexes_to_pop = [] for i in range(len(assets_in_use)): if str(assets_in_use[i]) in self._taken_assets: indexes_to_pop.append(i) while indexes_to_pop: assets_in_use.pop(indexes_to_pop.pop()) self._taken_assets.clear()
def worker_ready() -> bool: try: node_readiness_map = get_nodes_readiness(KUBE_CONFIG) except subprocess.SubprocessError: log.debug("Failed to list nodes. This is usually due to API downtime. Retrying") return False if f"{CLUSTER_PREFIX}-master-0" not in node_readiness_map: log.warning("Couldn't find master in node status list, this should not happen") return False if f"{CLUSTER_PREFIX}-worker-0" not in node_readiness_map: return False return all(node_status for node_status in node_readiness_map.values())
def day2_cluster( self, api_client: InventoryClient, request: FixtureRequest, day2_cluster_configuration: Day2ClusterConfig, ): log.debug( f"--- SETUP --- Creating Day2 cluster for test: {request.node.name}\n" ) cluster = Day2Cluster( api_client=api_client, config=day2_cluster_configuration, ) yield cluster
def handle_trigger(self, conditions_string: List[str], values: Dict[str, Any]) -> None: for k, v in values.items(): if not hasattr(self, k): continue if not self.is_user_set(k): log.debug( f"{self.__class__.__name__} - Trigger set `{k}` to `{v}`, Condition: {conditions_string}" ) self._set(k, v) else: log.warning( f"Skipping setting {k} to value {v} due that it already been set by the user" )
def get_cluster_func(nodes: Nodes, cluster_config: ClusterConfig) -> Cluster: log.debug( f"--- SETUP --- Creating cluster for test: {request.node.name}\n" ) _cluster = Cluster(api_client=api_client, config=cluster_config, nodes=nodes, infra_env_config=infra_env_configuration) if self._does_need_proxy_server(nodes): self.__set_up_proxy_server(_cluster, cluster_config, proxy_server) clusters.append(_cluster) return _cluster
def _get_disk_encryption_appliance(self): if isinstance(self._entity_config, BaseInfraEnvConfig): log.debug("Infra-env is not associated with any disk-encryption configuration") return {} assert ( self._entity_config.disk_encryption_mode == consts.DiskEncryptionMode.TPM_VERSION_2 ), "Currently only supporting TPMv2" master_vtpm2 = worker_vtpm2 = False if self._entity_config.disk_encryption_roles == consts.DiskEncryptionRoles.ALL: master_vtpm2 = worker_vtpm2 = True elif self._entity_config.disk_encryption_roles == consts.DiskEncryptionRoles.MASTERS: master_vtpm2 = True elif self._entity_config.disk_encryption_roles == consts.DiskEncryptionRoles.WORKERS: worker_vtpm2 = True return {"master_vtpm2": master_vtpm2, "worker_vtpm2": worker_vtpm2}
def cluster( self, api_client: InventoryClient, request: FixtureRequest, infra_env_configuration: InfraEnvConfig, proxy_server, prepare_nodes_network: Nodes, cluster_configuration: ClusterConfig, ): log.debug( f"--- SETUP --- Creating cluster for test: {request.node.name}\n") cluster = Cluster( api_client=api_client, config=cluster_configuration, infra_env_config=infra_env_configuration, nodes=prepare_nodes_network, ) if self._does_need_proxy_server(prepare_nodes_network): self._set_up_proxy_server(cluster, cluster_configuration, proxy_server) yield cluster if self._is_test_failed(request): log.info( f"--- TEARDOWN --- Collecting Logs for test: {request.node.name}\n" ) self.collect_test_logs(cluster, api_client, request, cluster.nodes) if global_variables.test_teardown: if cluster.is_installing() or cluster.is_finalizing(): cluster.cancel_install() if global_variables.test_teardown: with SuppressAndLog(ApiException): cluster.deregister_infraenv() with suppress(ApiException): log.info( f"--- TEARDOWN --- deleting created cluster {cluster.id}\n" ) cluster.delete()
def update_parameterized(cls, request: pytest.FixtureRequest, config: BaseConfig): """Update the given configuration object with parameterized values if the key is present""" config_type = config.__class__.__name__ parameterized_keys = cls._get_parameterized_keys(request) for fixture_name in parameterized_keys: with suppress(pytest.FixtureLookupError, AttributeError): if hasattr(config, fixture_name): value = request.getfixturevalue(fixture_name) config.set_value(fixture_name, value) log.debug( f"{config_type}.{fixture_name} value updated from parameterized value to {value}" ) else: raise AttributeError( f"No attribute name {fixture_name} in {config_type} object type" )
def infra_env( self, api_client: InventoryClient, request: FixtureRequest, proxy_server, prepare_infraenv_nodes_network: Nodes, infra_env_configuration: InfraEnvConfig, ): log.debug( f"--- SETUP --- Creating InfraEnv for test: {request.node.name}\n") infra_env = InfraEnv(api_client=api_client, config=infra_env_configuration, nodes=prepare_infraenv_nodes_network) yield infra_env log.info("--- TEARDOWN --- Infra env\n") if global_variables.test_teardown: with SuppressAndLog(ApiException): infra_env.deregister()
def execute(self, command, timeout=60, verbose=True): if not self._ssh_client: self.connect() if verbose: name = getattr(self._ssh_client, "name", "") log.debug( f"Running bash script: {command.strip()} {'on ' + name if name else name}" ) stdin, stdout, stderr = self._ssh_client.exec_command(command, timeout=timeout) status = stdout.channel.recv_exit_status() output = stdout.readlines() output = "".join(output) if verbose and output: log.debug(f"SSH Execution output: \n{output}") if status != 0: e = RuntimeError( f"Failed executing, status '{status}', output was:\n{output} stderr \n{stderr.readlines()}" ) e.output = output raise e return output
def download_must_gather(kubeconfig: str, dest_dir: str): must_gather_dir = f"{dest_dir}/must-gather-dir" os.mkdir(must_gather_dir) log.info( f"Downloading must-gather to {must_gather_dir}, kubeconfig {kubeconfig}" ) command = ( f"oc --insecure-skip-tls-verify --kubeconfig={kubeconfig} adm must-gather" f" --dest-dir {must_gather_dir} > {must_gather_dir}/must-gather.log") try: run_command(command, shell=True, raise_errors=True) except RuntimeError as ex: log.warning(f"Failed to run must gather: {ex}") log.debug("Archiving %s...", must_gather_dir) with tarfile.open(f"{dest_dir}/must-gather.tar", "w:gz") as tar: tar.add(must_gather_dir, arcname=os.path.sep) log.debug("Removing must-gather directory %s after we archived it", must_gather_dir) shutil.rmtree(must_gather_dir)
def all_operators_available() -> bool: try: operator_statuses = get_clusteroperators_status(KUBE_CONFIG) except subprocess.SubprocessError: log.debug("Failed to get cluster operators status. This is usually due to API downtime. Retrying") return False if len(operator_statuses) == 0: log.debug("List of operators seems to still be empty... Retrying") return False if not all(available for available in operator_statuses.values()): log.debug( "Following operators are still down: %s", ", ".join(operator for operator, available in operator_statuses.items() if not available), ) return False return True